diff --git a/.gitignore b/.gitignore
index 380e4417..c966ff18 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,8 @@ data/models/sequenceLabelling/*biobert*
 data/models/sequenceLabelling/*-bert-*-en/
 data/models/textClassification/citations*
 
+data/sequenceLabelling/datasets/dataseer_*.json
+
 data/models/sequenceLabelling/*0
 data/models/sequenceLabelling/*1
 data/models/sequenceLabelling/*2
diff --git a/data/models/sequenceLabelling/datasets-BidLSTM_CRF-with_ELMo/config.json b/data/models/sequenceLabelling/datasets-BidLSTM_CRF-with_ELMo/config.json
new file mode 100644
index 00000000..b8224397
--- /dev/null
+++ b/data/models/sequenceLabelling/datasets-BidLSTM_CRF-with_ELMo/config.json
@@ -0,0 +1,26 @@
+{
+    "model_name": "datasets-BidLSTM_CRF-with_ELMo",
+    "architecture": "BidLSTM_CRF",
+    "embeddings_name": "glove-840B",
+    "char_vocab_size": 172,
+    "case_vocab_size": 8,
+    "char_embedding_size": 25,
+    "num_char_lstm_units": 25,
+    "max_char_length": 30,
+    "features_vocabulary_size": 12,
+    "features_indices": null,
+    "features_embedding_size": 4,
+    "features_lstm_units": 4,
+    "max_sequence_length": 1500,
+    "word_embedding_size": 1324,
+    "num_word_lstm_units": 100,
+    "case_embedding_size": 5,
+    "dropout": 0.5,
+    "recurrent_dropout": 0.5,
+    "use_crf": true,
+    "use_chain_crf": false,
+    "fold_number": 1,
+    "batch_size": 20,
+    "transformer_name": null,
+    "use_ELMo": true
+}
\ No newline at end of file
diff --git a/data/models/sequenceLabelling/datasets-BidLSTM_CRF-with_ELMo/model_weights.hdf5 b/data/models/sequenceLabelling/datasets-BidLSTM_CRF-with_ELMo/model_weights.hdf5
new file mode 100644
index 00000000..c91723df
Binary files /dev/null and b/data/models/sequenceLabelling/datasets-BidLSTM_CRF-with_ELMo/model_weights.hdf5 differ
diff --git a/data/models/sequenceLabelling/datasets-BidLSTM_CRF-with_ELMo/preprocessor.json b/data/models/sequenceLabelling/datasets-BidLSTM_CRF-with_ELMo/preprocessor.json
new file mode 100644
index 00000000..d4c5784c
--- /dev/null
+++ b/data/models/sequenceLabelling/datasets-BidLSTM_CRF-with_ELMo/preprocessor.json
@@ -0,0 +1,211 @@
+{
+    "padding": true,
+    "return_lengths": true,
+    "return_word_embeddings": true,
+    "return_casing": false,
+    "return_features": false,
+    "return_chars": true,
+    "return_bert_embeddings": false,
+    "vocab_char": {
+        "<PAD>": 0,
+        "<UNK>": 1,
+        "\"": 2,
+        "#": 3,
+        "$": 4,
+        "%": 5,
+        "&": 6,
+        "'": 7,
+        "(": 8,
+        ")": 9,
+        "*": 10,
+        "+": 11,
+        ",": 12,
+        "-": 13,
+        ".": 14,
+        "/": 15,
+        "0": 16,
+        "1": 17,
+        "2": 18,
+        "3": 19,
+        "4": 20,
+        "5": 21,
+        "6": 22,
+        "7": 23,
+        "8": 24,
+        "9": 25,
+        ":": 26,
+        ";": 27,
+        "<": 28,
+        "=": 29,
+        ">": 30,
+        "?": 31,
+        "@": 32,
+        "A": 33,
+        "B": 34,
+        "C": 35,
+        "D": 36,
+        "E": 37,
+        "F": 38,
+        "G": 39,
+        "H": 40,
+        "I": 41,
+        "J": 42,
+        "K": 43,
+        "L": 44,
+        "M": 45,
+        "N": 46,
+        "O": 47,
+        "P": 48,
+        "Q": 49,
+        "R": 50,
+        "S": 51,
+        "T": 52,
+        "U": 53,
+        "V": 54,
+        "W": 55,
+        "X": 56,
+        "Y": 57,
+        "Z": 58,
+        "[": 59,
+        "\\": 60,
+        "]": 61,
+        "^": 62,
+        "_": 63,
+        "a": 64,
+        "b": 65,
+        "c": 66,
+        "d": 67,
+        "e": 68,
+        "f": 69,
+        "g": 70,
+        "h": 71,
+        "i": 72,
+        "j": 73,
+        "k": 74,
+        "l": 75,
+        "m": 76,
+        "n": 77,
+        "o": 78,
+        "p": 79,
+        "q": 80,
+        "r": 81,
+        "s": 82,
+        "t": 83,
+        "u": 84,
+        "v": 85,
+        "w": 86,
+        "x": 87,
+        "y": 88,
+        "z": 89,
+        "{": 90,
+        "|": 91,
+        "}": 92,
+        "~": 93,
+        "\u00a8": 94,
+        "\u00ae": 95,
+        "\u00b0": 96,
+        "\u00b1": 97,
+        "\u00b4": 98,
+        "\u00b5": 99,
+        "\u00bc": 100,
+        "\u00bd": 101,
+        "\u00c2": 102,
+        "\u00c4": 103,
+        "\u00c5": 104,
+        "\u00c9": 105,
+        "\u00d7": 106,
+        "\u00d8": 107,
+        "\u00df": 108,
+        "\u00e1": 109,
+        "\u00e2": 110,
+        "\u00e3": 111,
+        "\u00e4": 112,
+        "\u00e5": 113,
+        "\u00e9": 114,
+        "\u00ea": 115,
+        "\u00ed": 116,
+        "\u00ef": 117,
+        "\u00f1": 118,
+        "\u00f3": 119,
+        "\u00f4": 120,
+        "\u00f8": 121,
+        "\u00fc": 122,
+        "\u012b": 123,
+        "\u0131": 124,
+        "\u0142": 125,
+        "\u017b": 126,
+        "\u017e": 127,
+        "\u0219": 128,
+        "\u0263": 129,
+        "\u02da": 130,
+        "\u02dd": 131,
+        "\u0387": 132,
+        "\u0394": 133,
+        "\u0398": 134,
+        "\u03a6": 135,
+        "\u03a8": 136,
+        "\u03b1": 137,
+        "\u03b2": 138,
+        "\u03b3": 139,
+        "\u03b4": 140,
+        "\u03b5": 141,
+        "\u03b6": 142,
+        "\u03b8": 143,
+        "\u03ba": 144,
+        "\u03bb": 145,
+        "\u03bc": 146,
+        "\u03bd": 147,
+        "\u03c0": 148,
+        "\u03c1": 149,
+        "\u03c3": 150,
+        "\u03c4": 151,
+        "\u03c5": 152,
+        "\u03c8": 153,
+        "\u03d5": 154,
+        "\u2022": 155,
+        "\u2026": 156,
+        "\u2032": 157,
+        "\u2033": 158,
+        "\u2122": 159,
+        "\u2126": 160,
+        "\u212b": 161,
+        "\u2192": 162,
+        "\u2194": 163,
+        "\u2206": 164,
+        "\u2212": 165,
+        "\u223c": 166,
+        "\u2248": 167,
+        "\u2264": 168,
+        "\u2265": 169,
+        "\u25b3": 170,
+        "\ufffd": 171
+    },
+    "vocab_tag": {
+        "<PAD>": 0,
+        "B-data_device": 1,
+        "B-dataset": 2,
+        "I-data_device": 3,
+        "I-dataset": 4,
+        "O": 5
+    },
+    "vocab_case": [
+        "<PAD>",
+        "numeric",
+        "allLower",
+        "allUpper",
+        "initialUpper",
+        "other",
+        "mainly_numeric",
+        "contains_digit"
+    ],
+    "max_char_length": 30,
+    "feature_preprocessor": null,
+    "indice_tag": {
+        "0": "<PAD>",
+        "1": "B-data_device",
+        "2": "B-dataset",
+        "3": "I-data_device",
+        "4": "I-dataset",
+        "5": "O"
+    }
+}
\ No newline at end of file
diff --git a/data/models/sequenceLabelling/datasets-BidLSTM_CRF/config.json b/data/models/sequenceLabelling/datasets-BidLSTM_CRF/config.json
new file mode 100644
index 00000000..9e59e255
--- /dev/null
+++ b/data/models/sequenceLabelling/datasets-BidLSTM_CRF/config.json
@@ -0,0 +1,26 @@
+{
+    "model_name": "datasets-BidLSTM_CRF",
+    "architecture": "BidLSTM_CRF",
+    "embeddings_name": "glove-840B",
+    "char_vocab_size": 172,
+    "case_vocab_size": 8,
+    "char_embedding_size": 25,
+    "num_char_lstm_units": 25,
+    "max_char_length": 30,
+    "features_vocabulary_size": 12,
+    "features_indices": null,
+    "features_embedding_size": 4,
+    "features_lstm_units": 4,
+    "max_sequence_length": 1500,
+    "word_embedding_size": 300,
+    "num_word_lstm_units": 100,
+    "case_embedding_size": 5,
+    "dropout": 0.5,
+    "recurrent_dropout": 0.5,
+    "use_crf": true,
+    "use_chain_crf": false,
+    "fold_number": 1,
+    "batch_size": 20,
+    "transformer_name": null,
+    "use_ELMo": false
+}
\ No newline at end of file
diff --git a/data/models/sequenceLabelling/datasets-BidLSTM_CRF/model_weights.hdf5 b/data/models/sequenceLabelling/datasets-BidLSTM_CRF/model_weights.hdf5
new file mode 100644
index 00000000..856df7f3
Binary files /dev/null and b/data/models/sequenceLabelling/datasets-BidLSTM_CRF/model_weights.hdf5 differ
diff --git a/data/models/sequenceLabelling/datasets-BidLSTM_CRF/preprocessor.json b/data/models/sequenceLabelling/datasets-BidLSTM_CRF/preprocessor.json
new file mode 100644
index 00000000..d4c5784c
--- /dev/null
+++ b/data/models/sequenceLabelling/datasets-BidLSTM_CRF/preprocessor.json
@@ -0,0 +1,211 @@
+{
+    "padding": true,
+    "return_lengths": true,
+    "return_word_embeddings": true,
+    "return_casing": false,
+    "return_features": false,
+    "return_chars": true,
+    "return_bert_embeddings": false,
+    "vocab_char": {
+        "<PAD>": 0,
+        "<UNK>": 1,
+        "\"": 2,
+        "#": 3,
+        "$": 4,
+        "%": 5,
+        "&": 6,
+        "'": 7,
+        "(": 8,
+        ")": 9,
+        "*": 10,
+        "+": 11,
+        ",": 12,
+        "-": 13,
+        ".": 14,
+        "/": 15,
+        "0": 16,
+        "1": 17,
+        "2": 18,
+        "3": 19,
+        "4": 20,
+        "5": 21,
+        "6": 22,
+        "7": 23,
+        "8": 24,
+        "9": 25,
+        ":": 26,
+        ";": 27,
+        "<": 28,
+        "=": 29,
+        ">": 30,
+        "?": 31,
+        "@": 32,
+        "A": 33,
+        "B": 34,
+        "C": 35,
+        "D": 36,
+        "E": 37,
+        "F": 38,
+        "G": 39,
+        "H": 40,
+        "I": 41,
+        "J": 42,
+        "K": 43,
+        "L": 44,
+        "M": 45,
+        "N": 46,
+        "O": 47,
+        "P": 48,
+        "Q": 49,
+        "R": 50,
+        "S": 51,
+        "T": 52,
+        "U": 53,
+        "V": 54,
+        "W": 55,
+        "X": 56,
+        "Y": 57,
+        "Z": 58,
+        "[": 59,
+        "\\": 60,
+        "]": 61,
+        "^": 62,
+        "_": 63,
+        "a": 64,
+        "b": 65,
+        "c": 66,
+        "d": 67,
+        "e": 68,
+        "f": 69,
+        "g": 70,
+        "h": 71,
+        "i": 72,
+        "j": 73,
+        "k": 74,
+        "l": 75,
+        "m": 76,
+        "n": 77,
+        "o": 78,
+        "p": 79,
+        "q": 80,
+        "r": 81,
+        "s": 82,
+        "t": 83,
+        "u": 84,
+        "v": 85,
+        "w": 86,
+        "x": 87,
+        "y": 88,
+        "z": 89,
+        "{": 90,
+        "|": 91,
+        "}": 92,
+        "~": 93,
+        "\u00a8": 94,
+        "\u00ae": 95,
+        "\u00b0": 96,
+        "\u00b1": 97,
+        "\u00b4": 98,
+        "\u00b5": 99,
+        "\u00bc": 100,
+        "\u00bd": 101,
+        "\u00c2": 102,
+        "\u00c4": 103,
+        "\u00c5": 104,
+        "\u00c9": 105,
+        "\u00d7": 106,
+        "\u00d8": 107,
+        "\u00df": 108,
+        "\u00e1": 109,
+        "\u00e2": 110,
+        "\u00e3": 111,
+        "\u00e4": 112,
+        "\u00e5": 113,
+        "\u00e9": 114,
+        "\u00ea": 115,
+        "\u00ed": 116,
+        "\u00ef": 117,
+        "\u00f1": 118,
+        "\u00f3": 119,
+        "\u00f4": 120,
+        "\u00f8": 121,
+        "\u00fc": 122,
+        "\u012b": 123,
+        "\u0131": 124,
+        "\u0142": 125,
+        "\u017b": 126,
+        "\u017e": 127,
+        "\u0219": 128,
+        "\u0263": 129,
+        "\u02da": 130,
+        "\u02dd": 131,
+        "\u0387": 132,
+        "\u0394": 133,
+        "\u0398": 134,
+        "\u03a6": 135,
+        "\u03a8": 136,
+        "\u03b1": 137,
+        "\u03b2": 138,
+        "\u03b3": 139,
+        "\u03b4": 140,
+        "\u03b5": 141,
+        "\u03b6": 142,
+        "\u03b8": 143,
+        "\u03ba": 144,
+        "\u03bb": 145,
+        "\u03bc": 146,
+        "\u03bd": 147,
+        "\u03c0": 148,
+        "\u03c1": 149,
+        "\u03c3": 150,
+        "\u03c4": 151,
+        "\u03c5": 152,
+        "\u03c8": 153,
+        "\u03d5": 154,
+        "\u2022": 155,
+        "\u2026": 156,
+        "\u2032": 157,
+        "\u2033": 158,
+        "\u2122": 159,
+        "\u2126": 160,
+        "\u212b": 161,
+        "\u2192": 162,
+        "\u2194": 163,
+        "\u2206": 164,
+        "\u2212": 165,
+        "\u223c": 166,
+        "\u2248": 167,
+        "\u2264": 168,
+        "\u2265": 169,
+        "\u25b3": 170,
+        "\ufffd": 171
+    },
+    "vocab_tag": {
+        "<PAD>": 0,
+        "B-data_device": 1,
+        "B-dataset": 2,
+        "I-data_device": 3,
+        "I-dataset": 4,
+        "O": 5
+    },
+    "vocab_case": [
+        "<PAD>",
+        "numeric",
+        "allLower",
+        "allUpper",
+        "initialUpper",
+        "other",
+        "mainly_numeric",
+        "contains_digit"
+    ],
+    "max_char_length": 30,
+    "feature_preprocessor": null,
+    "indice_tag": {
+        "0": "<PAD>",
+        "1": "B-data_device",
+        "2": "B-dataset",
+        "3": "I-data_device",
+        "4": "I-dataset",
+        "5": "O"
+    }
+}
\ No newline at end of file
diff --git a/data/sequenceLabelling/datasets/ner_dataset_recognition_sentences.json b/data/sequenceLabelling/datasets/ner_dataset_recognition_sentences.json
new file mode 100644
index 00000000..d22dff28
--- /dev/null
+++ b/data/sequenceLabelling/datasets/ner_dataset_recognition_sentences.json
@@ -0,0 +1,72983 @@
+{
+    "lang": "en",
+    "level": "sentence",
+    "documents": [
+        {
+            "body_text": [
+                {
+                    "text": "With the close exception of the duration problem in the CAP data sets, PBCMC and BCMC outperformed BMC, sometimes dramatically so.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 69,
+                            "text": "CAP data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To that end, we show the evolutionary process on the la1 data set in , where some interesting observations can be made about their behaviors.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 65,
+                            "text": "la1 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We filter the sessions in our Yandex testing dataset to only include queries for which have at least one result judged as relevant.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 52,
+                            "text": "Yandex testing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, when adding MPIIGaze dataset for gaze prediction on real video data, the accuracy increases to 6.11 \u2022 .",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 38,
+                            "text": "MPIIGaze dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To demonstrate the utility of convolution kernels for natural language we applied our tree kernel to the problem of parsing the Penn treebank ATIS corpus .",
+                    "annotation_spans": [
+                        {
+                            "start": 128,
+                            "end": 153,
+                            "text": "Penn treebank ATIS corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Another related line of work is that of Kolak and Schilit, who studied the problem of identifying quotations (short pieces of text excerpted from other documents) in a large collection of documents, namely the Google Books corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 210,
+                            "end": 229,
+                            "text": "Google Books corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We opt for the ubiquitous query likelihood model , smoothing the unigram language model of individual documents with the unigram language model of the entire collection, using the Dirichlet smoothing scheme : is the MLE of term's probability based on the target collection D, and",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "More specifically, our model obtains significant performance improvements on ViSal and FBMS datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 100,
+                            "text": "ViSal and FBMS datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the properties of the learned features in a controlled setting, the networks were trained on simulated videos generated using the NORB dataset rescaled to 32 \u00d7 32 to reduce training time.",
+                    "annotation_spans": [
+                        {
+                            "start": 142,
+                            "end": 154,
+                            "text": "NORB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A snapshot of images in a Facebook survey conducted for constructing the paintings dataset Evaluation Metrics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Performance of retrieval models on DBpedia knowledge graph and the benchmark in .",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 58,
+                            "text": "DBpedia knowledge graph",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider the CIFAR-10  and SVHN  datasets, and use a ResNet-18  architecture.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 44,
+                            "text": "CIFAR-10  and SVHN  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These references are representative of state-of-the-art for deep and shallow architectures on these data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If the data point Y r is the jth point of the ith data set, then Y r is the same as X j (i) based on the earlier notations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It does, however, mean that, at least for the GOV2 collection examined in our experiments, it is usually a good idea to not use the original qrels built from a pool of old systems when evaluating a new ranking technique.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 61,
+                            "text": "GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we compare our proposed recipe generation system with retrieval baselines, which we use to search recipes in the entire test set for fair comparison.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We found that this technique works very well in practice, for the MNIST and NORB datasets (see below).",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 89,
+                            "text": "MNIST and NORB datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use synthetic and real datasets to evaluate the performance of different greedy and lazy greedy algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In \u00a74.1, we have introduced four individual methods to select candidates: entity category (A1) and entity type (A2) from the knowledge base, and table caption (B) and table entities (C) from the table corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 195,
+                            "end": 207,
+                            "text": "table corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For a fair comparison, we only compared to the methods trained solely on the NYU dataset without additional data.",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 88,
+                            "text": "NYU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We found that, on average (dashed line),  We used the 65 images in the newtest directory of the CMU+MIT dataset (a-1) using only 50 labeled image patches Alice can reject up to about 20% of the image patches in an image while keeping 80% of the faces in that image (i.e., Alice will reject 20% of the image patches that Bob's full classifier will classify as a face).",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 111,
+                            "text": "CMU+MIT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evolutionary Community Paths in Enron Email Dataset time stamp t\u22121, but also include all the community sets {C t\u22121 , C t\u22122 , C t\u2212g } detected within the last g steps into consideration.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 51,
+                            "text": "Enron Email Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the experiments in this paper are conducted on TREC (Text REtreival Conference) blog 06 and 07 data sets,",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 113,
+                            "text": "TREC (Text REtreival Conference) blog 06 and 07 data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This paper considers the problem of recovering projective camera matrices from collections of fundamental matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "extended the work to give a bound on the error rate of a hypothesis derived from a weighted combination of the source data sets for the specific case of empirical risk minimization.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These high costs have hindered the applicability of GP to large-scale datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection is a sizeable crawl of Semantic Web data (BTC-2009) .",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 65,
+                            "text": "BTC-2009",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Quantitative results on basketball dataset modeling offensive players and ball.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These data sets were specifically constructed to compare deep architectures and kernel machines .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides obtaining better compression, it can handle large and dense datasets, and is parameter-free in both theory and practice.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset, we randomly issue 300 queries for s",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For PASCAL-VOC, since (a) only 4 classes are unseen in ImageNet 1K, (b) one of the candidate class 'person' has no semantically similar class present in   , we further hold out a subset of training classes as our validation set for tuning hyperparameters.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 14,
+                            "text": "PASCAL-VOC",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 55,
+                            "end": 66,
+                            "text": "ImageNet 1K",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given the large qualitative differences in the data sets presented below, this visual method of analysis was sufficient for evaluating the competing models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The AOR evaluation collections created in the context of the SemSearch initiative used crowdsourcing techniques to create relevance judgements for the entities by asking anony-",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 30,
+                            "text": "AOR evaluation collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Ideally, we would like to have a reliable retrieval performance using CRTER on various datasets with its parameters within a stable safe range.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments show that our framework can generate better scene graphs, achieving the state-of-the-art performance on two benchmark datasets: Visual Relationship Detection and Visual Genome datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 150,
+                            "end": 206,
+                            "text": "Visual Relationship Detection and Visual Genome datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, we adjusted the scale on the car side data set: in this data set the images were too small and without this adjustment the detector would fail to find a significant amount of features.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 61,
+                            "text": "car side data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then, each transaction in the dataset is processed to compute the frequency of each member of the set",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let X \u2208 R d\u00d7n be the collection of data points to be searched.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Design (TBD) network  obtains 99.10% accuracy on CLEVR by using ground truth functional programs to train the network, which are not available for natural VQA datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 155,
+                            "end": 167,
+                            "text": "VQA datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 49,
+                            "end": 54,
+                            "text": "CLEVR",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MVP on multi-view face recognition is evaluated on the MultiPIE dataset , which contains 754, 204 images of 337 identities.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 71,
+                            "text": "MultiPIE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we show in  the best ARI values found with each measure and their respective correlation with ARI, for each dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For weak generalization training data from all users is used to learn the models, and average NDCG is computed on the withheld test data from same users (see Section 2 for details on NDCG calculation).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In practice, we use \u03c6 to construct ground truth constraint sets of variable length from images in the image train set, ensuring that our generator is trained on constraint sets that are feasible in S. Second, the generator update has an additional term: the constraint critic term that encourages constraint satisfaction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the results on both data sets reveal an interesting trend: the multilayer arc cosin often perform better than their single layer counterparts.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the TREC dataset, the F1-measure is 0.925 for finding ambiguous subtopics and 0.896 for finding multifaceted subtopics.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 19,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They were asked to search the collection of documents using one of two systems to find the exact documents that describe the species of a set of 8 plant specimens after a short training session to help them get acquainted with the system.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following , we use the accuracy of the detector network in Faster R-CNN on the validation set as the evaluation metric.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Caltech101 database  includes 9,144 images from 102 classes (101 common object classes and a background class).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "Caltech101 database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For English-Spanish document (Eng-Spa) and Document-Speech (Doc-Spe) data sets, each data item with a particular class label, e.g., 'israel' or 'bush,' in one data set is given as a query, and the data items of the other data set are retrieved from the aligned space.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 78,
+                            "text": "English-Spanish document (Eng-Spa) and Document-Speech (Doc-Spe) data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on TREC blog datasets show the significant effectiveness of the proposed unified model.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 42,
+                            "text": "TREC blog datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Potsdam dataset 6 consists of 38 high resolution aerial images, which covers an area of 3.42 km 2 , and each aerial image is captured in four channels (NIR, R, G, and blue (B)).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 19,
+                            "text": "Potsdam dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our approach on two analogy-based word similarity tasks  and show that despite the considerably shorter training times our models outperform the Skip-gram model from  trained on the same 1.5B-word Wikipedia dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 199,
+                            "end": 226,
+                            "text": "1.5B-word Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data sets consist of the Corel image data set and a collection of Flickr web pages.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 49,
+                            "text": "Corel image data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 56,
+                            "end": 86,
+                            "text": "collection of Flickr web pages",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By conducting an appropriate user study, we calibrate and validate the measure against the TREC 2005 Robust Track test collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 129,
+                            "text": "TREC 2005 Robust Track test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the SphericalMNIST dataset, all methods use the exact same   summarizes the methods' CNN accuracy on all three 360 \u2022 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 29,
+                            "text": "SphericalMNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each result for the text datasets was averaged over 10-fold cross-validation, otherwise a fixed split into training and test sets was used.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 33,
+                            "text": "text datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, let us consider a real question-answer pair from the Answerbag community-based QA collection 2 (we will use it as a running example throughout the rest of the paper):",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 105,
+                            "text": "Answerbag community-based QA collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This method produces indexes that are size-competitive with other space-efficient methods, typically around 7GB for the GOV2 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 120,
+                            "end": 135,
+                            "text": "GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Qualitative evaluations are further performed on Color FERET dataset  and MIT-CBCL face recognition database , where multi-view facial images are available.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 68,
+                            "text": "Color FERET dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 74,
+                            "end": 108,
+                            "text": "MIT-CBCL face recognition database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Face dataset  is a standard dataset used for alternative clustering; it includes images of 20 people in various poses.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "Face dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For that, we use the Quote dataset and the #SP and SP centralities.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 34,
+                            "text": "Quote dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Ad-hoc collections and produced a GoP by using the Terrier 1 open source IR system .",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 18,
+                            "text": "Ad-hoc collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "3) We construct the first Fine-grained Fashion Landmark Dataset (FFLD) that provides more comprehensive landmark annotations for diverse clothes types.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 63,
+                            "text": "Fine-grained Fashion Landmark Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For very large data sets, stochastic gradient descent convergence time seems to increase sub-linearly with the size of the data set (see experiments on Brown vs Hansard below).",
+                    "annotation_spans": [
+                        {
+                            "start": 152,
+                            "end": 157,
+                            "text": "Brown",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 161,
+                            "end": 168,
+                            "text": "Hansard",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the figure we can see that by using ascending order of features' frequency, the size of representative set decreases for each of original review dataset, which demonstrates the effectiveness of the sorting strategy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Further analysis shows that only 58% of the user-feature pairs are unrelated on this data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, we randomly select 500 vehicle images from the Ve-hicleID dataset and label three vehicle parts with bounding boxes (window, light and brand), and these images are used to train the YOLO model.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 76,
+                            "text": "Ve-hicleID dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Advanced regularization techniques, such as recurrent batch normalization  and Zoneout  are able to improve standard RNNs to perform near state-of-the-art on the recent psMNIST benchmark (95.9%).",
+                    "annotation_spans": [
+                        {
+                            "start": 169,
+                            "end": 186,
+                            "text": "psMNIST benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the spatio-temporal nature of datasets used in the various application domains brings intriguing questions regarding co-location pattern analysis.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Its aim is to build test collections in the six chosen Indian languages.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then measured how many instances of all selected categories for each task are present in our datasets, which also largely varies between 7,172 and 34,160 instances.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the performance of our application, we have run a simple user study in which the generated images are restricted to a subset of 38 materials from the MERL database that we deemed to be representative of the appearance space of the measured materials.",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 175,
+                            "text": "MERL database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The basic idea is to compute a cluster quality measure for a partitioning of the original dataset and compare it with the range of quality measures obtained from a collection of random permutations applied to the dataset to destroy any cluster Downloaded 03/24/20 to 82.173.143.206.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To build the collection, we first group tweets by topic which are then manually annotated according to sentiment polarity and strength.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the artist's line drawings, MIT67 and Places365 databases, the percentage of contour ink pixels over all the RGB pixels in the photographs, is only 7.44%, 8.75% and 8.32%, on average.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 60,
+                            "text": "MIT67 and Places365 databases",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Zero-shot learning accuracy on all three datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the largest 1000 hosts of the GOV2 dataset are treated as 1000 sources, and the number of sources that contain relevant documents is usually much less than 1000.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 42,
+                            "text": "GOV2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The average customer on the reduced data set provided 127 judgments, with 70 being deemed relevant.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The visual comparisons on human image matting testing dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the DATA II dataset, there was an improvement only for subject S2 using the U-SVM with the U nm and",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 22,
+                            "text": "DATA II dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that SGD-kernel is not shown in  and 5, since our experiments show that the method's testing accuracy is always the portion of the major class for datasets \"w3a\" and \"ijcnn1\", in which the two classes are high unbalanced.",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 165,
+                            "text": "w3a",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 172,
+                            "end": 178,
+                            "text": "ijcnn1",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We introduce clarity score to quantify the lack of ambiguity with respect to data sets constructed from the TREC collections and the rank correlation test results demonstrate a strong positive association between the clarity scores and retrieval precisions for queries.",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 124,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Remarkably, we achieve mAP of 75.0% and 18.6% on the two datasets with 3 progressive steps and using respectively only 11 and 34 initial proposals.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the proposed algorithm on five benchmark datasets, including OTB-2013, OTB-2015, VOT-2015, VOT-2016, and Temple color-128.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 81,
+                            "text": "OTB-2013",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 83,
+                            "end": 91,
+                            "text": "OTB-2015",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 93,
+                            "end": 101,
+                            "text": "VOT-2015",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 103,
+                            "end": 111,
+                            "text": "VOT-2016",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 117,
+                            "end": 133,
+                            "text": "Temple color-128",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the results for the \"Two Patterns\" dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 48,
+                            "text": "\"Two Patterns\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can therefore even search the rapidly growing collection of game records made by computer Shogi programs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "N U f eat are first trained on ImageNet; N R and N U are then individually fine-tuned on a semantic segmentation dataset, such as Cityscapes .",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 39,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 130,
+                            "end": 140,
+                            "text": "Cityscapes",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is done by maximizing the following discriminative goodness function over the validation set : with respect to the Y and Y parameters, while everything else is kept fixed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also tested the clustering quality of HPStream and HDDStream on the Forest Cover Type Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 96,
+                            "text": "Forest Cover Type Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After this process, our click-based query collection consists of 136,000 queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By retraining the single-frame 3D pose estimator using our automatically-generated dataset, we obtain a more robust network that performs better on real-world (3DPW ) and mocap (HumanEVA ) datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 148,
+                            "end": 197,
+                            "text": "real-world (3DPW ) and mocap (HumanEVA ) datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The CURL generative model trained i.i.d (without MGR, and with dynamic expansion) is competitive with the state-ofthe-art on MNIST (bettered only by VaDE, which incorporates a larger architecture) and Omniglot (bettered only by DirVAE).",
+                    "annotation_spans": [
+                        {
+                            "start": 201,
+                            "end": 209,
+                            "text": "Omniglot",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 125,
+                            "end": 130,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Spam Email data set was released by ECML/PKDD 2006 discovery challenge (www.ecmlpkdd2006.org/challenge.html).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "Spam Email data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "proposed to integrate information from Wikipedia, WordNet, Geo-Name corpus, etc., to build Yago2, an open domain structured knowledge base.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 74,
+                            "text": "Geo-Name corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method achieves significant improvements on the CIFAR-10, Oxford Flowers and CUB Birds datasets over several GAN baselines both qualitatively and quantitatively.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 99,
+                            "text": "CIFAR-10, Oxford Flowers and CUB Birds datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report in  the area under the ROC curve (AUC) and average precision (AP) evaluated on the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i on the document collection sample k",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Gowalla dataset includes a total of 6442890 check-in records generated by 196591 users from February 2009 to October 2010; And the Brightkite dataset includes a total of 4747200 check-in records generated by 51406 users over the period of April 2008 -October 2010.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 19,
+                            "text": "Gowalla dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 135,
+                            "end": 153,
+                            "text": "Brightkite dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that our approach outperforms the state-of-the-art by a large margin in retrieval, clustering and re-identification tasks on CUB200-2011, CARS196, Stanford Online Products, Inshop  Clothes and PKU VehicleID datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 133,
+                            "end": 223,
+                            "text": "CUB200-2011, CARS196, Stanford Online Products, Inshop  Clothes and PKU VehicleID datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The learnt representation, finetuned on the UCF-101 dataset, turned out to be 20% less accurate than hand-crafted state-of-the-art trajectory-based representation .",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 59,
+                            "text": "UCF-101 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the case of the TREC WT10G collection, which contains around 1.6 million documents, the depth of the wavelet tree is 24.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 40,
+                            "text": "TREC WT10G collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Partitioning the document collection reduces latency, while replication of services increases throughout .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hence, most existing instructional video datasets  focus on a specific domain like cooking or furniture assembling, and  only consists of five tasks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to quantify the detrimental effects of short TTI we examined data from the two subjects in dataset IIa+b from the BCI Competition III .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We focus attention on the Graph500 benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 44,
+                            "text": "Graph500 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Expertise retrieval has been largely unexplored on data other than the W3C collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 85,
+                            "text": "W3C collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The exper-imental results reveal that the proposed PFAN exceeds the state-of-the-art performance on three UDA datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 118,
+                            "text": "UDA datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Currently, no online/streaming solutions for convex MF are known as it appears hard to satisfy the convexity constraint without having access to the whole dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We continue using the following 2 datasets: Optdigits and MNist.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 53,
+                            "text": "Optdigits",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 58,
+                            "end": 63,
+                            "text": "MNist",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A methodology for simulating ambiguous topics was described and a test collection was built.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The NYU dataset provides depth images from three different views, we trained our method both using only frontal view data and using all three views.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "NYU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "originally proposed label smoothing as a strategy that improved the performance of the Inception architecture on the ImageNet dataset, and many state-of-the-art image classification models have incorporated label smoothing into training procedures ever since .",
+                    "annotation_spans": [
+                        {
+                            "start": 117,
+                            "end": 133,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "X \u2192 Y * that converts a data set representation X 2 to an intermediate representation g (X 2 ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each of the 3 datasets, we run the amortization model on two types of ranking sequences:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We implement our benchmark using the iNat2017 dataset , an organically collected, crowdsourced compendium of living organisms, with fine-and coarse-grained species distinctions, a heavy-tailed class size distribution, and bounding box annotations for a significant subset.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 53,
+                            "text": "iNat2017 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 3 we provide the details of our supervised learning algorithm and in Section 4 we present results of the algorithm applied to the Kaggle Connectomics dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 141,
+                            "end": 168,
+                            "text": "Kaggle Connectomics dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a dataset and a minimum support threshold, DCI CLOSED efficiently performs the mining task using a bounded and predictable amount of memory.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show that on all data sets, the Bayesian hierarchical modeling approach has a statistical significant improvement over the regularized linear regression model, which is equivalent to the Bayesian hierarchical models learned at the first iteration.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Comparison between the dictionaries obtained using the Stochastic-LoBCoD method vs. the online methods in  on the MIRFLICKR-1M dataset . .",
+                    "annotation_spans": [
+                        {
+                            "start": 116,
+                            "end": 136,
+                            "text": "MIRFLICKR-1M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the number of samples in the training dataset of N te is often extremely large, e.g. there are over 1.2 million images in the ILSVRC 2012 dataset with file size of 120GB.",
+                    "annotation_spans": [
+                        {
+                            "start": 135,
+                            "end": 154,
+                            "text": "ILSVRC 2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We next present in  the value of p(\u03b8 S |T ) given the \"Yeast2\" dataset  Error Rate  # Source Datasets Discussion and Parameter Sensitivity.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 70,
+                            "text": "\"Yeast2\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is is changing since the release of large de-identi ed datasets like MIMIC 1 and i2b2 2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 76,
+                            "text": "MIMIC 1",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 81,
+                            "end": 87,
+                            "text": "i2b2 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "More importantly, built upon the publicly available ApolloScape dataset , our ApolloCar3D dataset contains multitude of data sources including stereo, camera pose, semantic instance label, per-pixel depth ground truth, and moving videos.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 71,
+                            "text": "ApolloScape dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 78,
+                            "end": 97,
+                            "text": "ApolloCar3D dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate unsupervised clustering performance in terms of how well the discovered clusters reflect known ground truth labels of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This data set contains 581012 observations and each observation consists of 54 attributes, including 10 quantitative variables, 4 binary wilderness areas and 40 binary soil type variables.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These factors limit the benefits from using T S and wT S and from exploiting co-occurrence patterns among pre-assigned tags in that dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "applied Conditional Random Fields (CRFs) , segmenting a 100-image subset of the Corel image database into 7 classes, and segmenting the Sowerby Image Database (104 images) into 8 classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 100,
+                            "text": "Corel image database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 136,
+                            "end": 158,
+                            "text": "Sowerby Image Database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our version of the FERET database contained 1002 frontal face images of 429 individuals taken over a period of a few years under varying lighting conditions.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 33,
+                            "text": "FERET database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "implement only the na\u00efve greedy algorithm, whose complexity is O(N 2 d 2 K), and do not exploit the underlying structure of the problem to accelerate the algorithm; this limits their experiments to datasets with no more than 10 4 comparisons.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On each collection, we evaluate our proposed model by a 10-fold cross-validation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the CIFAR10 dataset, our examples are more realistic than those generated by the best GAN model trained by .",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 22,
+                            "text": "CIFAR10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset (which we call the \"Breast\" dataset) contains 58 expression profiles concerning 3389 genes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Dang and Croft  did the same with TREC Web collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 54,
+                            "text": "TREC Web collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "q. CT R(i |q) relects the \u0142utility\u017e of a listing and can be used for annotation and constructing a benchmark dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We collected 200 real X-rays and randomly selected 200 synthetic X-rays from the training set of the paired LIDC dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 120,
+                            "text": "LIDC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Together, these lead to state-of-the-art weakly-supervised detection results on the challenging PASCAL VOC dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 114,
+                            "text": "PASCAL VOC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The idea behind DiskMine is that, even if the whole dataset may be large, every projection on single items is likely to be very small.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the articles in Reuters-21578 dataset belong to multiple categories, we build binary classifier for each of the ten most frequent classes to identify the news topic as in .",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 40,
+                            "text": "Reuters-21578 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, although DCCAE and MDcR perform favorably on Caltech101 and handwritten, respectively, it is not promising on other datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 81,
+                            "text": "Caltech101 and handwritten",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When evaluated on the MNIST and NORB datasets, we found that our method achieves lower classification error rates than other feature learning methods, including standard dropout, denoising auto-encoders, and restricted Boltzmann machines.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 45,
+                            "text": "MNIST and NORB datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on several SR benchmarks show that AtlasifySR+E achieves state-of-the-art performance while also remaining language-neutral and using only open, easily accessible data, overcoming two limitations of the current state-of-the-art SR measure.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 36,
+                            "text": "SR benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To gain more insights, we analyze the label level distributions of the generated summary sentences of the 3 systems on TREC 20009 data set, as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 119,
+                            "end": 138,
+                            "text": "TREC 20009 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "DukeMTMC-ReID  is an image-based reid dataset generated from DukeMTMC",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 13,
+                            "text": "DukeMTMC-ReID",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, to provide a fair comparison with BERT (section 3.2), we also trained XLNet-Large-wikibooks on BooksCorpus and Wikipedia only, where we reuse all pretraining hyper-parameters as in the original BERT.",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 115,
+                            "text": "BooksCorpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 120,
+                            "end": 129,
+                            "text": "Wikipedia",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After The 50K dataset is a collection of 50K books in German randomly selected from the the IA database.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 21,
+                            "text": "50K dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 92,
+                            "end": 103,
+                            "text": "IA database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The above findings confirm our assumption in Section 3.3.2 that improvements can be made by naturally integrating variance to address uncertainty without using collection statistics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "4 on the NCAA dataset are the best) because the added relation modules can aid in refining the importance features.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 21,
+                            "text": "NCAA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With thirty times more testing than training data, the RCV1-bin dataset is a good benchmark for over fitting issues.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 71,
+                            "text": "RCV1-bin dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, we focus closely on the results for the \"Animal\" dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 62,
+                            "text": "\"Animal\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each plot corresponds to a given query granularity -day, month, year or the full life time of the respective collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "DS3 considers a source dataset and its goal is to encode the target data according to pairwise dissimilarity between each sample of source and target datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The synthetic datasets used in our experiments are based on model .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider our initial hybrid results as very promising, especially given that the LOD sample data sets used in the test collections were extremely noisy and incomplete (automated data cleaning and entity linking are two prominent research topics in the LOD community, which should hopefully alleviates those issues on the medium term).",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 104,
+                            "text": "LOD sample data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Jeb Bush, Hacker Forums, and Local News datasets were used for the At Home task, in which participants ran their systems on their own platforms, connecting via the internet to the evaluation server, which was run by the track coordinators.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 52,
+                            "text": "Jeb Bush, Hacker Forums, and Local News datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We assume that the test set is also drawn from the same distribution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare CH-FD with several state-of-the-art MIL algorithms on 5 benchmark MIL datasets: 2 Musk datasets  and 3 Image Annotation datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report results on both the Quote and Twitter datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 56,
+                            "text": "Quote and Twitter datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This collection has been employed in the TREC ,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In total the dataset contains 107,146 training and 13,845 test examples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unlike in the Adult data set, there is a AUC performance gap between Control and Random in the Magic data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 28,
+                            "text": "Adult data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 95,
+                            "end": 109,
+                            "text": "Magic data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first evaluation, based on the LETOR datasets , uses manual relevance assessments as ground-truth labels and synthetic clicks as feedback to BARACO.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 49,
+                            "text": "LETOR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For both datasets, the preprocessing of documents 1 http://www.lemurproject.org/",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimental comparison is done on the ModelNet dataset , which is a representative large-scale 3D shape repository.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 59,
+                            "text": "ModelNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On a larger experiment on the MNIST dataset, our approach outperforms the distributed variational inference method in , who used a class-conditional density modeling approach.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 43,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One researcher noted I1: \"for me it starts with developing the research questions and data collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2.3, the relevance judgments of almost all test collections are based on one person's interpretation of a topic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate our method on the ImageNet classification dataset  with various network architectures such as ResNet-18, -34 and AlexNet.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 64,
+                            "text": "ImageNet classification dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, with the suggested setting of D = 20 the PMF model on the full Netflix dataset has over 10 million parameters and is prone to overfitting.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 91,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the Tags data is gathered from the La-belMe 8  and Flickr 9 databases.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 78,
+                            "text": "La-belMe 8  and Flickr 9 databases",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This poster briefly describes the test collections that will be used for the FIRE 1 workshop.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Under this setting, the Ima-geNet 1K dataset  is selected for performance evaluation, with a source/target class split of 389/611.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 44,
+                            "text": "Ima-geNet 1K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Three datasets Mfeat, Isolet, and Optdigits were prepared exactly the same way as , where only two similar classes were used as inliers.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 20,
+                            "text": "Mfeat",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 22,
+                            "end": 28,
+                            "text": "Isolet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 34,
+                            "end": 43,
+                            "text": "Optdigits",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If these data sets already provided a train/test split we merged them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the category B ClueWeb09 collection, 19 diversity runs were submitted from ten research groups, these were the pool of search outputs used.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 39,
+                            "text": "category B ClueWeb09 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To our best knowledge, it is currently the largest dataset for comprehensive instructional video analysis.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we show that RIM performs better than competing approaches on several real-world data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We employ real and synthetic datasets during our evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this evaluation we utilize the Tweet2011, 2012 and 2013 collections with a total of contains 170 topics.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 70,
+                            "text": "Tweet2011, 2012 and 2013 collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, several algorithms that claim specific abilities are not tested on datasets designed to test these abilities, e.g., QCG  claims better compositional performance, but it is not evaluated on CVQA .",
+                    "annotation_spans": [
+                        {
+                            "start": 202,
+                            "end": 206,
+                            "text": "CVQA",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our results on the MNIST and NORB datasets demonstrate that the new pretraining algorithm allows us to learn much better generative models.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 42,
+                            "text": "MNIST and NORB datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare our approach Automatic Frankensteining with the current state of the art for automated machine learning on 80 different data sets and can show that it outperforms them on the majority using the same training time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the aforementioned datasets (ClueWeb09-zh, SogouT-08/12, and etc.) have already gained much success in supporting corresponding researches, they are somewhat dated and small, given the scale of today's Web.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this experiment, we finetune our network using the ground truth plane instances we generated on NYUv2 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 113,
+                            "text": "NYUv2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Images from the van Hateren database were reduced four times from their original size, and 25 \u00d7 25 pixel image patches were randomly extracted (n = 50,000).",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 36,
+                            "text": "Hateren database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is desired to construct a new dataset based on ClueWeb for experiments in federated search.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 57,
+                            "text": "ClueWeb",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first explicit dataset is adom  which contains 1464 ratings by 84 college students for 192 movies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Sphere GAN outperforms recent state-of-the-art methods including IPM-based GAN variants for unsu-pervised image generation problems with CIFAR-10, STL-10, and LSUN bedroom datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 137,
+                            "end": 180,
+                            "text": "CIFAR-10, STL-10, and LSUN bedroom datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We illustrate the effectiveness of the proposed method on a broad spectrum of natural and biomedical datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We discuss the experimental results on a number of real and synthetic data sets in terms of effectiveness and efficiency.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Word embedding based ranking, becomes less competitive on this larger dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While smaller than natural language datasets, further investigation reveals that it covers 88.8% and 70.6% of VQA questions and answers respectively, corroborating its wide diversity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To see the performance of the BP-LR on real world data, we implemented a linear chain CRF on the \"newsgroup FAQ dataset\" 2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 122,
+                            "text": "\"newsgroup FAQ dataset\" 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A number of real data sets from the UCI machine learning repository 2 were used for the testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 69,
+                            "text": "UCI machine learning repository 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our explanation is that, for the RM and RM+Oracle methods, the improvement over the QL model on the Robust2004 dataset is not as significant as them on AP and WSJ datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 118,
+                            "text": "Robust2004 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 152,
+                            "end": 171,
+                            "text": "AP and WSJ datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let P be the total number of pixels in the test set",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We argue that PGEMs are particularly interpretable event models and could be useful for providing insights about the dynamics in an event dataset to political or financial analysts or medical practitioners or scientists; 2) Importantly, we present data-driven algorithms that learn a PGEM from an event dataset without additional user information, unlike the state-of-the-art models; 3)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, sensor networks typically create large amounts of uncertain data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With this setup, we applied SugarCube to two different subsets of the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the MIT-67 dataset, we observe that our algorithm outperforms even the finetuning method, which requires pre-training of the student network on the source task.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 22,
+                            "text": "MIT-67 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider the transformation between real image pairs in Proposal Flow dataset to be non-parametric transformation, as there is no specific parametric geometric model that can be generalized to describe the transformation for any real image pair.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 80,
+                            "text": "Proposal Flow dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This sparsity inducing property lets us to reduce the model size by upto two orders of magnitude as in datasets compared to RMLR.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The gold-standard value of R for the TREC 2012 collection is the estimate produced using the entire set of runs submitted to the Medical Records track.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 57,
+                            "text": "TREC 2012 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On standard datasets (CIFAR-10 and SVHN), we show that the proposed regularizer leads to a significant boost of the robustness of neural networks, comparable to that of adversarial training.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 30,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 35,
+                            "end": 39,
+                            "text": "SVHN",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The two most prominent are the earn and acquisition region of the map, reflecting the large proportion of these categories in the Reuters-21578 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 130,
+                            "end": 154,
+                            "text": "Reuters-21578 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let D be a training dataset that is partitioned into m submodels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, we extract a small social graph from the last.fm dataset as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 62,
+                            "text": "last.fm dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The datasets Ising Chain, 2D Torus and 3D Torus contain instances that stem from applications in statistical physics .",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 24,
+                            "text": "Ising Chain",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 26,
+                            "end": 34,
+                            "text": "2D Torus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 39,
+                            "end": 47,
+                            "text": "3D Torus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "dataset  through the spherical polyhedron projection of the original datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this work, we focus on MSN-1, the first MSN fold, and Y!S1 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 70,
+                            "text": "MSN-1, the first MSN fold, and Y!S1 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While we have reported the best results (as far as we know) compared to other ranking algorithms on TD2004 and MQ2004 data sets, it should be noted that we did not tune the LamdaMART models for bagging.",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 127,
+                            "text": "TD2004 and MQ2004 data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Instead, we develop a training dataset Camera-Display 1M with over one million images and 25 camera-display pairs, to train a neural network to learn the representative CDTF.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 57,
+                            "text": "Camera-Display 1M ",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows an example event dataset with N = 7 events from the event label set L = {A, B, C} over T = 20 days.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "DCI CLOSED just need the original dataset and the tid-lists of the nodes along the path of the depth first visit along the lattice.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next we describe how to integrate the positional index structure into our overall framework for positional query processing over versioned document collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Further, we benchmark several state-of-the-art VQA models  on our proposed VQA-Rephrasings dataset to highlight the fragility of VQA models to question rephrasings.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 98,
+                            "text": "VQA-Rephrasings dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Perhaps surprisingly, this result holds non-parametrically, which means that the counterfactual ERs decompose following Thm. 2 for any functional form of the classifier and the underlying causal models where the dataset was generated.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MixD data set is generated to test whether an algorithm can handle data set with clusters of arbitrary sizes and distributions.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "MixD data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , the authors suggest that for homogenous newswire collections a small window is enough to capture useful dependencies, while for large, noisy web collections, a larger span must be set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use train set and validation set on PASCAL VOC 2007 and train set and validation set on PASCAL VOC 2012 as a real-world dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 68,
+                            "text": "PASCAL VOC 2007 and train set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 91,
+                            "end": 106,
+                            "text": "PASCAL VOC 2012",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Structure: We can use the annotations described above as masks over the objects (see  for examples), allowing us to divide the questions in the training set into linguistic pattern groups.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These are implemented in the IBM ProbE T M (Probabilistic Estimation) data mining engine, which is an object-oriented framework for building classes of segmented predictive models from massive training data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To extract anchor text from the ClueWeb09 category B collection, we used the harvestlinks method, which comes with Indri.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 63,
+                            "text": "ClueWeb09 category B collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the performance on fundamental matrix estimation we downloaded kusvod2 3 (24 pairs), Multi-H 4 (5 pairs), and AdelaideRMF 5 (19 pairs) datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 155,
+                            "text": "kusvod2 3 (24 pairs), Multi-H 4 (5 pairs), and AdelaideRMF 5 (19 pairs) datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We created a second test set from the KITTI dataset by choosing a subset of patches for which full ground truth flow was available.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 51,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Upon receiving documents from a selected information source, SSL checks for overlapping documents exist in the sample database.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Controlled experiments on multiple benchmark datasets show strong empirical evidence for the strength of the proposed approach, as it significantly outperformed several state-of-the-art methods, including Rank-SVM, ML-kNN and IBLR-ML (Instance-based Logistic Regression for Multi-label Classification) in most cases.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The model-based method EyeTab performs poorly on three datasets with mean error larger than 20 degrees.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The energy function of the RBM trained on the CIFAR 10 dataset, modelling input pixels with 3 (R,G,B) Gaussian variables , is exactly the one shown in eq. 4; while the RBM trained on MNIST uses logistic units for the pixels and the energy function is again the same as before but without any quadratic term.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 62,
+                            "text": "CIFAR 10 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 183,
+                            "end": 188,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "DOI: http://dx.doi.org  test collections that model high-resource settings fairly well (e.g., retrieval of English-language news broadcasts ), but",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly for the \"PubMedTrain2\" dataset which is more complex due to its larger vocabulary size, senLDA converges around 12 times (an order of magnitude) faster.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 40,
+                            "text": "\"PubMedTrain2\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For a fairer comparison, we trained these methods on our dataset before testing them on the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "4.2, we also augmented the size of the datasets by tilting the camera randomly.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, the corresponding data sets with perturbation level u were denoted by Syn20.D20K.P(u) and Syn25.D20K.P(u)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ClueWeb09 and ClueWeb 12 both provided subsets of corresponding datasets for researchers not yet ready to scale up to 1 billion documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Left: results on the STB dataset  in 20mm-50mm; right: results on Dex-ter+Object dataset  in 0-100mm.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 32,
+                            "text": "STB dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 66,
+                            "end": 88,
+                            "text": "Dex-ter+Object dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We performed a first set of experiments on synthetic signed graphs created from a subset of the USPS digit recognition dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 126,
+                            "text": "USPS digit recognition dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We proposed a method to classify songs in the Million Song Dataset according to song genre.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 66,
+                            "text": "Million Song Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A Gaussian process prior defines a distribution over a real-valued function f (x) if, for any collection has the multivariate Gaussian distribution, where m = {m(xn)} N n=1 and the covariance matrix K is the values of the covariance function evaluated between all pairs of xn, x n \u2032 \u2208 X, i.e. K nn \u2032 =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the former, we used the Fruit dataset  for both algorithms, as shown in  : Comparison between the dictionaries obtained using the Stochastic-LoBCoD method vs. the methods in  and  on the Fruit dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 41,
+                            "text": "Fruit dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 191,
+                            "end": 204,
+                            "text": "Fruit dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(a) and (b) show the reconstruction errors for the training samples for the dedup and MySQL datasets, respectively .",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 100,
+                            "text": "dedup and MySQL datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, the Enron dataset had more trie nodes due to its large number of distinct keywords in the emails.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 24,
+                            "text": "Enron dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As we discussed, this is especially beneficial when information about the entire dataset is inaccessible.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This makes sense because the Mapillary Vista dataset is close to Cityscape in terms of domain similarity, and thus provides better initialization than ImageNet.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 52,
+                            "text": "Mapillary Vista dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 65,
+                            "end": 74,
+                            "text": "Cityscape",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 151,
+                            "end": 159,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The right panels of figures 2 and 3 show the test set error rates from arc cosine kernels",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, note that if the value ofs computed from the original dataset is larger than all but q \u2212 1 of the m permutation values, the (one-sided) probability of observing a value this extreme in the permutation data is less than q/m.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The rst use case is about the visualization of an existing test collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Popu-lar benchmarks for VQA  have involuntarily encouraged the development of methods that learn and leverage statistical patterns such as biases (i.e. the long-tailed distributions of answers) and question-conditioned biases (which make answers easy to guess given a question without the image).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, test datasets can also be made of multiple latent domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, we apply GLG to the application dataset of \u03b4 18 O isotope of Oxygen records in Asia and achieve promising results to discover the moisture transportation patterns in a 800-year period.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Indeed, previous studies that apply active learning to retrieval can only evaluate their approaches using moderate-scale collections (such as the 11,000-documents Reuters collections used in  and ), or only focus on the documents retrieved by one query (top 100 documents in  and top 200 in ).",
+                    "annotation_spans": [
+                        {
+                            "start": 163,
+                            "end": 182,
+                            "text": "Reuters collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The hand textures are split so that textures from 14 subjects are used for training and 3 for test and validation sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is due to the fact that both BIC and AIC graphs are estimated using all the n = 800 data points, while the oracle graph is estimated using only the subsampled dataset with size b(n) =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": RMSE for different algorithms on Flixster and Epinion datasets (best viewed in color).",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 64,
+                            "text": "Flixster and Epinion datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One lesson Cleverdon appears to have drawn is the importance of defining relevance judgements in a test collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We proceed by describing the GQA question engine and the four-step dataset construction pipeline: First, we thoroughly clean, normalize, consolidate and augment the Visual Genome scene graphs  linked to each image.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ENZYMES and PROTEINS are sets of proteins from the BRENDA database  and the dataset of Dobson and Doig , respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 66,
+                            "text": "BRENDA database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 7,
+                            "text": "ENZYMES",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 12,
+                            "end": 20,
+                            "text": "PROTEINS",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 76,
+                            "end": 102,
+                            "text": "dataset of Dobson and Doig",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply this algorithm to the Climate Research Unit precipitation dataset spanning 106 years.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 74,
+                            "text": "Climate Research Unit precipitation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then some interesting observations can be made about the corresponding RkNN transaction database for RN N , where each transaction corresponds to a document/item p and stores p's",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 96,
+                            "text": "RkNN transaction database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To stress our robustness in multiple contexts, we additionally evaluate our method on the Angel point clouds from the recent laser scan dataset  (more results are in the supplemetary material due to lack of space).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, it is not easy to apply such a model to datasets composed of diverse objects and background, i.e. different objects in different scenes have different motions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For synthetic data, we repeat each experiment 150 times, each time with a different randomly generated dataset; we report average t k values, as well as standard deviations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this test, we compare the frequency that a user uses the same opinion words to describe a specific feature in the original data set versus that in the permuted data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Proposing a novel deep architecture for capsule networks, termed DeepCaps, that aims at improving the performance of the capsule networks for more complex image datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It also defines a collection of classifiers on R d",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Orbitz hotel review dataset contained 609,884 individual human authored reviews that rated 30,621 hotels.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 31,
+                            "text": "Orbitz hotel review dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The top panel shows our results for the Kaine collection, which consisted of about 400,000 documents from Tim Kaine's eight-year tenure as Governor of Virginia.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 56,
+                            "text": "Kaine collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With the depth given by sensors, normal can be easily calculated via a least square optimization  as used in the widely used NYUv2 dataset , * indicates equal contribution.",
+                    "annotation_spans": [
+                        {
+                            "start": 125,
+                            "end": 138,
+                            "text": "NYUv2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the ILSVRC2013 training set has on average fewer objects per image than the validation set, we use this data as our classification data.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 33,
+                            "text": "ILSVRC2013 training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "provides histogram plots of the AUC values for the binary classification task on each dataset with four different time splits corresponding to the time points when 25%, 50%, 75%, and 100% of events have occurred.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Other parameters, including the feedback interpolation coefficient \u03b1, the two additional parameters \u03c3 and \u03bb in PRM, the passage size, and the passage smoothing parameter in RM3-p, were all tuned on Terabyte05 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 198,
+                            "end": 216,
+                            "text": "Terabyte05 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Overall, NV+SP sets a new state-of-the-art on the CMU dataset and on the challenging sequences of the Aachen and RobotCar datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 19,
+                            "text": "NV+SP sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 50,
+                            "end": 61,
+                            "text": "CMU dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 102,
+                            "end": 130,
+                            "text": "Aachen and RobotCar datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On challenging data sets, we have obtained results that outperform previous SVMs and compare favorably to deep belief nets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Second, from the view point of transfer learning, the supervision knowledge from auxiliary sources can be conveniently incorporated by using \u03b8 learned from the source datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the performance of our method on a large-scale geosocial multimedia network dataset and show that our method achieves the best performance than other state-of-the-art solutions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We submit the former one to the test server for fair comparison with other methods that are trained on KITTI object detection dataset only.",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 133,
+                            "text": "KITTI object detection dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Control points in the second layer were fixed at the center of translation : Impact of training set size on NORB test performance for architectures with two convolutional or affine symnet layers followed by a fully connected layer and then softmax classification.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We introduce techniques to perform exact Gaussian process (GP) training and inference on massive datasets by exploiting a structure that is present in many problems.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The historical dataset is needed for computing the stimulation degree sd for each record in the training dataset, because sd is determined by novelty and novelty in turn depends on a user's historical accesses to the items (see below for further details).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given the high accuracy reached on the MNIST dataset largely gives rise to questions concerning saturation, we opted to perform a further evaluation on the more visually diverse SVHN dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 52,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 178,
+                            "end": 190,
+                            "text": "SVHN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The scale of modern datasets allows an unprecedented opportunity to infer individual-level effects by borrowing power across large cohorts; however, principled statistical methods for accomplishing this goal are lacking.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The heterogeneous condensation is capable of handling both static and dynamic data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The 2011 and 2012 evaluations used the Tweets2011 corpus, 1 which consists of an approximately 1% sample (after some spam removal) of tweets from January 23, 2011 to February 7, 2011 (inclusive), totaling approximately 16 million tweets.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 56,
+                            "text": "Tweets2011 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also note that  pretrain the WRN-28-10 feature extractor  to jointly classify all 64 classes in mini-ImageNet meta-training set; then freeze the network during the meta-training.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 130,
+                            "text": "mini-ImageNet meta-training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Identifying latent domains from training datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test our method using the OHSUMED dataset which consists of 196, 000 medical abstracts and 3, 506 relevance labels for 63 queries from the Document Filtering Track from TREC 4.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 44,
+                            "text": "OHSUMED dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 172,
+                            "end": 178,
+                            "text": "TREC 4",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To analyze the effect of each component in our framework, we perform the ablation tests on all the datasets with the follow settings:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare and evaluate the numerical performance of ML-ARL and MaxEnt-ARL for fair classification tasks on the UCI dataset, illumination invariant classification on the Extended Yale B dataset and two fabricated tasks on the CIFAR-10 and CIFAR-100 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 123,
+                            "text": "UCI dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 170,
+                            "end": 193,
+                            "text": "Extended Yale B dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 226,
+                            "end": 257,
+                            "text": "CIFAR-10 and CIFAR-100 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To tackle this problem, we applied our method to learn a nonparametric dictionary from the SUN database , a large dataset comprised of over 130K images, which capture a broad variety of scenes.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 103,
+                            "text": "SUN database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, Yeast dataset is given as an auxiliary source when the target dataset is about lung cancer drugs.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 26,
+                            "text": "Yeast dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then, from the training set we removed sessions by users who never switched during the statistics period, similarly as how the test set was created in the challenge setting.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate STA on several benchmark datasets of various openness levels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We learn the model from denotations without explicit annotation of logical forms, and evaluate it on a large-scale dataset consisting of 200K dialogs over 12.8M entities.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then used this trained model 5 to predict the gold standard judgments in the four SR datasets discussed above.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 96,
+                            "text": "SR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "where f (w, X) denotes the frequency of word x in X, and C is the background collection used to smooth language model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on PASCAL VOC 2012 benchmark demonstrate that the proposed method is effective and achieves the state-of-the-art results.    .",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 50,
+                            "text": "PASCAL VOC 2012 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on three standard datasets: TRECVID MED 11 , KTH Actions  and UCF Sports , and show that LMMC outperforms non-latent MMC and conventional clustering methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 65,
+                            "text": "TRECVID MED 11",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 68,
+                            "end": 79,
+                            "text": "KTH Actions",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 85,
+                            "end": 95,
+                            "text": "UCF Sports",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We empirically study the convergence property of M 2 LID on the Spam Email dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 82,
+                            "text": "Spam Email dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Movie-Lens (10M) and Netflix datasets are used in this study.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 41,
+                            "text": "Movie-Lens (10M) and Netflix datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In other words, the background of this dataset is nearly independent of classes, thus the background is not a discriminative region .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The stimuli are 16\u00d716 images of handwritten digits from the MNIST data set, presented sequentially, one per \"time slice\".",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 74,
+                            "text": "MNIST data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The perplexity score we report is evaluated as exp(\u2212completion log likelihood/N), where N is the total number of words in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, we characterize deletes on the raw Tweets2013 collection over a much longer period of time and examine its impact on associated test collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 62,
+                            "text": "Tweets2013 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, the leading singular vector is computed, and then the most correlated sample in the dataset is matched with the computed singular vector.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "wdbc is the Wisconsin Diagnostic Breast Cancer data set, which is from UCI machine learning repository  .",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 102,
+                            "text": "UCI machine learning repository",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 12,
+                            "end": 55,
+                            "text": "Wisconsin Diagnostic Breast Cancer data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 4,
+                            "text": "wdbc",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to get a quantitative idea of the magnitudes of these effects, the method proposed here was applied to the Accidents data set .",
+                    "annotation_spans": [
+                        {
+                            "start": 116,
+                            "end": 134,
+                            "text": "Accidents data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Learning to Rank Challenge data sets, the results show that our method outperforms two strong baselines:",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 36,
+                            "text": "Rank Challenge data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the bias parameters (\u03b1, \u03b2) are shared by all new classes, allowing us to estimate them with a small validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The NORB dataset  contains images of 50 different 3D toy objects with 10 objects in each of five generic classes: cars, trucks, planes, animals, and humans.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "NORB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It can be observed that TMSA outperforms the compared methods significantly on both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These are two very challenging prediction tasks: even after preprocessing, the datasets are still sparse, and very few repeated observations are available for a particular combination of user, item, feature and  ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The work of [18] used the 2 nd protocol to evaluate their performance in MNIST , FMNIST  and COIL100  datasets, whereas the authors of  and  chose the 1 st protocol on MNIST and CIFAR10  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 168,
+                            "end": 195,
+                            "text": "MNIST and CIFAR10  datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 110,
+                            "text": "MNIST , FMNIST  and COIL100  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Sim was set to 200 for Bayesian MAXQ for Taxi-World and 1000 for Bayesian model-based Q, and to 1000 for both for Resource collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 114,
+                            "end": 133,
+                            "text": "Resource collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For our dataset, we learned embeddings for each company.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact, there has been a large amount of research on delta (or differential) compression and other redundancy elimination techniques for storage systems and networks (see, e.g., ), and many systems now use such techniques to store their collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The value for each agent type under \"Agents\" corresponds to the average number of instances of that agent per frame of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If the \"gray\" attribute classifier fires only on 60% of the \"gray\" samples in the validation set, i.e., TP=0.6, then only 0.6 fraction of the \"elephant\" signature is passed on to the positive (i.e., right) node.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Acronyms: replace an expanded acronym with the shorter acronym, based on the Vera acronym database 2 (e.g. \"United Nations\" \u2192\"UN\").",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 100,
+                            "text": "Vera acronym database 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We now present the performance evaluation results on the data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each sample in our synthetic dataset, in addition to the hand-object image (HO-img) we render two images of the corresponding isolated and unoccluded hand (H-img) or object (O-img).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For unsupervised feature learning of MDRNN, we used unlabeled data of MIR-Flickr database while converting the text features using the new vocabulary from PASCAL database.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 89,
+                            "text": "MIR-Flickr database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 155,
+                            "end": 170,
+                            "text": "PASCAL database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Saat\u00e7i  introduced scalable GP modelling techniques for datasets whose inputs are distributed on a full Cartesian tensor product grid to leverage ecient Kronecker matrix algebra .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On DIGLIB dataset, we only report the results at source level because the document judgments are difficult to make as many sources do not provide their full text information.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 17,
+                            "text": "DIGLIB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Social-IQ answers are longer than previous datasets by nearly a factor of 100% in average length.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Patch Camelyon or PCam dataset  contains 327 680 tiles from two classes, metastatic (tumorous) and non-metastatic tissue.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "Patch Camelyon",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 22,
+                            "end": 34,
+                            "text": "PCam dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested our methods on ten data sets, each with three ranked lists of keywords generated by distinct methods of keyword suggestion.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Assuming that each document has an unknown topic as a hidden variable, O is a sample of opinionated documents of the whole collection, and the prior for opinionated but not relevant documents, i.e. P (O|R), is provided by P (O|R)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Through exploiting a novel Spatio-Temporal Anti-Spoof Network (STASN), we are able to push the performance on public face anti-spoofing datasets over stateof-the-art methods by a large margin.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first data set is the Feigenbaum sequence over the binary alphabet A = {1,2}.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For OER collection, the most straightforward method is to search for the paper title (exact match) in different search engines.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "OER collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For ease of experimentation, we rounded timestamps in both datasets to day granularity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is meaningful to assess how (and whether) the common datasets used in many gaze estimation works satisfy (or violate)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The natural images were taken from the van Hateren database  and were reduced four times from their original size.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 59,
+                            "text": "van Hateren database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The entire data set consisted of about 500, 000 records (about 2.5 Gigabytes) and is too large for most conventional statistical modeling packages.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Meanwhile, Zobel , examined the fairness of top k pooling methods for selecting documents to assess, showing that a pooling depth of 100 appeared to be adequate for search over the TREC-5 test collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 181,
+                            "end": 203,
+                            "text": "TREC-5 test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the ShanghaiTech dataset, two of our approach variants ADCrowdNet(AMG-DME) and ADCrowdNet(AMG-bAttn-DME) achieved better performances than existing approaches.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 27,
+                            "text": "ShanghaiTech dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Percentage of Correct Parts (PCP) is the standard evaluation metric on several benchmarks including the LSP dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 115,
+                            "text": "LSP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Otherwise, in the temporal association mining literature, recent efforts have attempted to capture special temporal profiles of association patterns in market basket transaction datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Additionally, the results show that Authorship is a much better heuristic and can achieve reasonable performance compared with Sequential Pattern Mining although it seems that performance may be dataset dependent.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As part of this work, we provide the first image dataset for reflection removal consisting of the sub-aperture views from the DP sensor.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows results for the various sampling methods on two matrices, one a 2000 2000 Gaussian kernel matrix produced by the Madelon dataset from the NIPS 2003 Workshop on Feature Extraction (madelon kernel), and the other a 4656 3923 scan of the US Declaration of Independence (declaration).",
+                    "annotation_spans": [
+                        {
+                            "start": 119,
+                            "end": 134,
+                            "text": "Madelon dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Slim evaluates more candidates than Krimp-datasets for which a minute decrease in minsup leads to an explosion of candidates.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 50,
+                            "text": "Krimp-datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show the results over ROUGE recall and Fmeasure scores on the DUC 2006 dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 78,
+                            "text": "DUC 2006 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This paper addresses the problem of recovering projective camera matrices from collections of fundamental matrices in multiview settings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our LSB-CMM model out-performs the other two methods on the BirdSong database, and its performance is comparable to SIM on the MSRCv2 dataset and to CLPL on the Lost dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 77,
+                            "text": "BirdSong database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 127,
+                            "end": 141,
+                            "text": "MSRCv2 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 161,
+                            "end": 173,
+                            "text": "Lost dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 6, we present numerical results on both simulated and real fMRI datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 83,
+                            "text": "fMRI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To bound the output of the model to the [\u22121, 1] interval, we use tanh as the activation function for the output layer, in the pairwise training setting.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After training each network, we sampled 200 objects from the test set in order to evaluate the robustness of each model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset contains 13, 220 videos of 101 action classes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We applied our GSBM+MVPois on IMDb 3 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 44,
+                            "text": "IMDb 3 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows MRR of each method on STUDY and EVAL-UATE testing dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 63,
+                            "text": "STUDY and EVAL-UATE testing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On MOT15 dataset, method most similar to ours is RNN, which also uses an RNN to perform data association.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 16,
+                            "text": "MOT15 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, the dataset containing only pair-wise data is not complex enough to evaluate causal discovery algorithms in real-world scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct the experiments on six real world datasets: DBLP, Facebook, Youtube, Twitter, GooglePlus, and LiveJournal, all of which are available to download at snap.stanford.edu/data.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 59,
+                            "text": "DBLP",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 61,
+                            "end": 69,
+                            "text": "Facebook",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 71,
+                            "end": 78,
+                            "text": "Youtube",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 80,
+                            "end": 87,
+                            "text": "Twitter",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 99,
+                            "text": "GooglePlus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 116,
+                            "text": "LiveJournal",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to evaluate the effectiveness of our quilt detection algorithm at identifying spam web pages, we drew a small sample from the set of detected quilts, and extracted the words of each quilted page and their source documents from the ClueWeb09 corpus (again leveraging DryadLINQ).",
+                    "annotation_spans": [
+                        {
+                            "start": 240,
+                            "end": 256,
+                            "text": "ClueWeb09 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our method on both NYU dataset and the Hands2017Challenge dataset, where our method outperforms recent state-of-theart methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 77,
+                            "text": "Hands2017Challenge dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 31,
+                            "end": 42,
+                            "text": "NYU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We applied our supervised DCS-GCS algorithm to three CMU benchmarks, the supervised two-spiral problem, the speaker independent vowel recognition problem and the sonar mine!",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The graph data set includes all the email records of the year 2001 with a total of 54,214 nodes and approximately 1 million edges.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The relative improvements on GOV2 collection are 16.82% and 6.28% over LM and RM, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 44,
+                            "text": "GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this context, the recent work of  is inspiring as it contributes manually annotated relations for the ImageNet video dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 127,
+                            "text": "ImageNet video dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The attribute-labeled images originate from 40, 20, and 707 \"seen\" classes in each dataset, respectively; we use the class labels solely to map to attribute annotations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the execution time performance of our accelerated algorithms over both synthetic and reallife datasets, demonstrating that they significantly outperform na\u00efve implementations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also demonstrated that adding a function of the variance into document ranking provides an alternative way to improve the performance without smoothing from collection data, and the performance of our approach without background smoothing was comparable with that of the state-of-the-art models, namely, Jelinek-Mercer and Dirichlet smoothing language models, and the BM25 model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate the utility of our method by applying it to uncalibrated image collections of various sizes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, we apply this computationally efficient stochastic approach to the larger CNN models on ImageNet dataset in Section 3.2.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 110,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We trained our proposed NJFA model on the NIPS 0-12 dataset, which contains the articles from Neural Information Processing Sys-tems (NIPS) conference published between 12 years (during 1988-1999).",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 59,
+                            "text": "NIPS 0-12 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A two level hierarchy of organizational units (faculties and institutes) is available in the UvT Expert collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 114,
+                            "text": "UvT Expert collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the cooking instructions, we tokenize the raw text and remove words that appear less than 10 times in the dataset, and replace them with unknown word token.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reason might be that the real dataset has a small sample size 141 compared with the number of nodes and edges in the causal graph.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We describe two methodologies for obtaining segmented regression estimators from massive training data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to score queries based on some \"naturalness\" measure, we trained a language model using the Hansard parliament speech corpus (0.76M sentences) and the IMDB movie review dataset (1.22M sentences).",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 133,
+                            "text": "Hansard parliament speech corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 160,
+                            "end": 185,
+                            "text": "IMDB movie review dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For training and evaluating our single image deblurring network, we utilized the same train-test split of the GoPro dataset  as recent deblurring methods  .",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 123,
+                            "text": "GoPro dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, MMC outperforms them on the two largest datasets, TRECVID MED 11 and KTH Actions, and is comparable with them on UCF Sports.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 73,
+                            "text": "TRECVID MED 11",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 78,
+                            "end": 89,
+                            "text": "KTH Actions",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finding opinions from a text data set have been widely studied, and several surveys summarized existing works .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on the TREC GOV collection, which is a 2002 crawl of 1.25M web pages from the .gov domain.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 49,
+                            "text": "TREC GOV collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We surpass the state-of-the-art on the VQA-CP v2 benchmark and demonstrate our approach to be intrinsically more robust to out-of-distribution test data.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 58,
+                            "text": "VQA-CP v2 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "-For fair comparisons with the baselines, on the MovieLens dataset we follow the same prepossessing steps mentioned in .",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 66,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For reasons of efficiency, and without loss of generality, we can hence limit F to the collection of all itemsets in D with a support of at least 1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "OpenSearch is a collection of simple formats for the sharing of search results and the OpenSearch description document format can be used to describe a search engine.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 10,
+                            "text": "OpenSearch",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated gaze estimation accuracy on the MPI-IGaze dataset using leave-one-subject-out cross-validation.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 62,
+                            "text": "MPI-IGaze dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our implementation of the top-k multiclass SVM scales to large datasets like Places 205 with about 2.5 million examples and 205 classes .",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 87,
+                            "text": "Places 205",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2 2 {\u03b8 2 q, and hyperparameter tuning is performed beforehand on the fully structured dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this dataset, we observe check-ins from 111 countries, spawning 669 venue types.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this dataset, we train the model with the training set and evaluate the performance with the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular we used motions {02, 03, 04, 05, 06, 07, 08, 09, 10, 11} of subject 35 in the CMU mocap dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 92,
+                            "end": 109,
+                            "text": "CMU mocap dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here we use title and author information from the IA database as follows:",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 61,
+                            "text": "IA database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We make use of the 2016 TREC Contextual Suggestion (TRECCS) dataset, which contains user profiles in the form of relevance-rated documents, and demonstrate the competitiveness of our approach by comparing our system to the best performing systems of the TRECCS task.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 67,
+                            "text": "2016 TREC Contextual Suggestion (TRECCS) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(top left) shows a 4-second synthetic ball video with background texture from the Describable Textures Dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 110,
+                            "text": "Describable Textures Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then we finetune \u03b8 f and the task-specific classification and regression parameters on the target datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Obtain the k-cluster partition P k i of the dataset",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We address the first research question by training various parameters across two standard TREC test collections, in Section 4.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 111,
+                            "text": "TREC test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the pre-trained model on ImageNet as a feature extractor (with all parameters frozen) can reduce the number of parameters when the model is applied to a new dataset, but it leads to bad performance due to the domain shift.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 39,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We propose an approach to automatically discover latent domains in image or video datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In general, when applying our prediction models on the TREC Session Track 2011 dataset, our results indicate that the better the prediction accuracy, the greater the improvement of retrieval performance over the baseline.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 86,
+                            "text": "TREC Session Track 2011 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For fair comparison, we process the original MVSA datasets based on the same approach used in  to remove noise tweets, in which the textual label and visual label are inconsistent.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 58,
+                            "text": "MVSA datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, we evaluate multiple state-of-the-art algorithms on both natural and synthetic VQA datasets, and we propose a new algorithm that works well for both.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 97,
+                            "text": "VQA datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TIMIT is a speech database that contains two orders of magnitude more training samples than the other datasets, and the largest output label space.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 5,
+                            "text": "TIMIT",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the fact that although DCI CLOSED OOC performs many more I/O operations, it subdivides and prunes the dataset effectively, thus producing very compact and cache-friendly in-core data structures for each partition.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The test collection is still in a formative stage.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "summarizes the quantitative results on the test set (visual comparisons in the supplementary material).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "According to statistics, the average token numbers of Java and Python datasets have been more than doubled after parsing.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 78,
+                            "text": "Java and Python datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the drosophila promoters data set, similar experiments are conducted, and the results are shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments show that dictionary size can be reduced by 50% or more (less than 0.1% of the collection size) with no significant effect on compression or access speed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "GoodNews exhibits important differences to current benchmark datasets for generic captioning like MSCOCO, while it is similar in nature, but about five times larger than BreakingNews, the largest currently available dataset for news image captioning.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 104,
+                            "text": "MSCOCO",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 170,
+                            "end": 182,
+                            "text": "BreakingNews",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 8,
+                            "text": "GoodNews",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, despite 60 years of extensive research on SG methods, most of the applications focusing on finite datasets, we are not aware of any other SG method that achieves a linear convergence rate while preserving the iteration cost of standard SG methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the results of constructing WSA on the tweets in class #dragons for the NPB data set: the construction time of WSA was linear to the number of alphabets.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 90,
+                            "text": "NPB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is provided as a 60/20/20 train/validation/test split.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the third stage, we use the underlying semantic forms to reduce bi-ases in the conditional answer distribution, resulting in a balanced dataset that is more robust against shortcuts and guesses.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train on mini-batches of 32 samples using gray-scale images of approximate size 160x160 pixels randomly cropped from the Berkeley segmentation dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 153,
+                            "text": "Berkeley segmentation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Parameters trained on Legal 2006 dataset when applied to Legal 2007 lead to an MAP of 0.0606, statAP of 0.0168, worse than the no expansion baseline.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 40,
+                            "text": "Legal 2006 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 57,
+                            "end": 67,
+                            "text": "Legal 2007",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For some of the Web collections used for evaluation in Section 4, we also use the PageRank score  of the document, Ppr(d), and the confidence level that the document is not spam, Pspam(d).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The remainder of the dataset is classified as having a softness of 2 (partially sunny skies).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We achieve very competitive performance on the test set: 73.4 IoU score 1 , which is to date the best performance amongst methods that use the same augmented VOC training dataset  (marked as \"VOC extra\" in the table).",
+                    "annotation_spans": [
+                        {
+                            "start": 158,
+                            "end": 178,
+                            "text": "VOC training dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Wikipedia dataset some of the shared topics are similar to the class-specific topics in category \"geography\".",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 24,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, Wikipedia (http://wikipedia.org/) and the web directories contain huge collection of documents discussing various topics of the world.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is divided into the low-res and the high-res scenes, and provides the ground truth depth maps for MVS training.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that the weighted trace-norm regularization indeed yields significant gains on the highly non-uniformly sampled Netflix dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 120,
+                            "end": 135,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We therefore expect that this method would be extremely fast for massive datasets where there are few gaps but in section 5",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to assess the quality of each individual classifier, each one was tested on the NTCIR-TQIC test data set containing 300 unseen queries (75 for each class).",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 113,
+                            "text": "NTCIR-TQIC test data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "with 1 million images) and action detection (e.g., Activ-ityNet v1.3  with 20k videos), most existing instructional video datasets are relatively smaller in scale.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 68,
+                            "text": "Activ-ityNet v1.3",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the classification procedures described in section 3.3, we estimated classification performance on the individual epochs of both data sets by 10fold cross-validation within each subject's data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then automatically determine the real number of permission request for each mobile app based on the training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One of the commonest IR system evaluation methodologies is the test collection-based method, referred to as the Cranfield framework .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Pascal Context dataset  is a scene parsing dataset, containing the semantic labels for the entire image, with 4,998 training and 5,105 validation images.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 22,
+                            "text": "Pascal Context dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The figures on the right are from reshaping DSLR and Webcam, of which the \"keyboard\" images are taken in an office environment with various lighting, object poses, and background controlled by the dataset creators .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Noisy Synthetic data set: This is a data set generated by adding noise data points, numbering 10% of the total data points, in the synthetic data set described above.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 24,
+                            "text": "Noisy Synthetic data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experimental results on three benchmark datasets show that they are more effective than several state-of-the-art recommendation methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "our dataset supports, we introduce five new metrics to get further insight into visual reasoning methods and point to missing capabilities we believe coherent reasoning models should possess.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the deterministic method  achieves an error rate of 0.85% and to our knowledge, is the best performing method in this dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, given the scale of the document collections we consider, such data structures do not scale, both with respect to their memory consumption and the processing cost required to exploit them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, the simulated scores are realistic, as they maintain the same distributions and correlations among systems as in the original collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results demonstrate notable performance improvement in state-ofthe-art one-stage detectors based on AP-loss over different kinds of classification-losses on various benchmarks, without changing the network architectures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is because the number of classes in CALTECH-101 is significantly larger than that of the two VOC challenge data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 121,
+                            "text": "VOC challenge data sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 41,
+                            "end": 52,
+                            "text": "CALTECH-101",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the FIMI repository we use the Accidents, BMS and Pumsb datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 24,
+                            "text": "FIMI repository",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 36,
+                            "end": 69,
+                            "text": "Accidents, BMS and Pumsb datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All searches were run over the MEDLINE collection of biomedical journal article citations, as published by the US National Library of Medicine.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 49,
+                            "text": "MEDLINE collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After fine-tuning, despite the fact that this activation map has the highest mean NSS score for all regions annotated as text in the dataset, it still favors heads.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, GANs are challenging to evaluate, and a satisfactory generalization measure on a test set to assess overfitting does not yet exist.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In general, we would like to associate a different privacy level with each record in the data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We derive a stochastic gradient descent optimization procedure which allows GPFM to scale linearly to the number of user-item-context, GPFM can thus be used on large-scale industry datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first dataset is EMOd  which consists of 1019 emotional images.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 25,
+                            "text": "EMOd",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, for large sparse datasets, the BCTF model significantly outperformed its MAP counterpart, and in particular, it noticeably outperformed BTF on the Conceptnet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 166,
+                            "end": 184,
+                            "text": "Conceptnet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each query was tested against both TREC collections, and the filtering criteria used was that every word in the query had to appear in at least 10 distinct documents, resulting in a total of 656,172 unique queries for the TREC 7 & 8 collection, and a total of 793,334 unique queries for the TREC WT10G collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 51,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 222,
+                            "end": 243,
+                            "text": "TREC 7 & 8 collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 291,
+                            "end": 312,
+                            "text": "TREC WT10G collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the proposed methodology, for each object o = Io, Fo, Yo in the test and validation sets, we randomly select half of its tags to be included in Io.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further compare on the validation set of KITTI benchmark to other related methods that are not on the benchmark, including bilateral filter using color (Bilateral), fast bilateral (Fast), optimization using total variance (TGV), and deep depth completion for indoor scene .",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 59,
+                            "text": "KITTI benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In total 1863 entries were made on the validation sets during the development period and 135 entries on all test sets for the final competition.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Notice that the metadata is not fully reliable since it is typed by people who scan and/or upload the book in to the IA database.",
+                    "annotation_spans": [
+                        {
+                            "start": 117,
+                            "end": 128,
+                            "text": "IA database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper we present, to the best of our knowledge, the first publicly available collection for identifying and ranking triggers of sentiment spikes in Twitter.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, if we simply use Eq. 2 to compute the importance of each feature map, it suffers from heavy computation cost, since we need to compute all the feature maps with respect to the entire training dataset to obtain a comparative generalized result.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our training set does not include any functions from the NAS benchmark suite which we use for evaluating the end-to-end runtimes of our learnt policy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If min{p k } < \u03b8 where \u03b8 is a specified significance threshold, the dataset D exhibits evidence of significant structure.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We believe that, just as static collections often benefit from modeling topics, dynamic collections will likely benefit from temporal modeling of events and time-sensitive user interests and intents, which were rarely addressed in the literature.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All models are trained on the FlyingChair-sOcc dataset with the S short schedule and tested on multiple datasets to assess generalization across datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 54,
+                            "text": "FlyingChair-sOcc dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The most naive way is to combine all the source domains into a single dataset and adapt from this \"mega\" domain to the target domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the results on the DBLP dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 37,
+                            "text": "DBLP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the TREC Blogs06 collection , which contains about 3 million postings.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 34,
+                            "text": "TREC Blogs06 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Four zero-shot learning benchmarks are evaluated in our experiments including SUN attribute dataset 1 , Animals with Attributes 2 (AwA2) 2 , Caltech-UCSD Birds 2011 (CUB)  and aPascal-aYahoo (aP&aY)  .",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 99,
+                            "text": "SUN attribute dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 104,
+                            "end": 127,
+                            "text": "Animals with Attributes",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 141,
+                            "end": 164,
+                            "text": "Caltech-UCSD Birds 2011",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 176,
+                            "end": 190,
+                            "text": "aPascal-aYahoo",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the second set of experiments, we evaluate the benefit of feature selection and using unlabeled data on two real-world activity datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In these scenarios, we apply mixed-norm l 1,2 regularization strategy which models the distribution of large-scale datasets such as LSHTC more appropriately as compared to most regularization schemes typically employed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also note that Authorship and Position, two simple heuristics, perform reasonably well and achieve comparatively high F1-Score on both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimental results have shown the great challenges of our dataset and the effectiveness of the proposed method for step localization.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As it was stated, epsilon dataset has 400, 000 training samples, and if we want to run the experiment for 7 epochs on 5 machines with mini-batch size of 128 (T = 21875), based on Table 1, we can calculate the given value for \u03c4 which for our LUPA-SGD is T 2 3 /(pb)",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 33,
+                            "text": "epsilon dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The other is a heterogeneous data set that is a concatenation of ten diverse datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This paper shows that the distinctiveness of the retrieval runs used to construct the first test collection built using EaaS, the TREC 2013 Microblog collection, is not substantially different from that of the TREC-8 ad hoc collection, a high-quality collection built using traditional pooling.",
+                    "annotation_spans": [
+                        {
+                            "start": 130,
+                            "end": 160,
+                            "text": "TREC 2013 Microblog collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 210,
+                            "end": 234,
+                            "text": "TREC-8 ad hoc collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Construct the DynamicGaze dataset, which not only provides another benchmark for evaluating static gaze estimation but benefits the community for studying eye gaze and eye movement dynamics.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 33,
+                            "text": "DynamicGaze dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Fidelity: A summary should have good coverage, meaning that all of the distinct \"concepts\" in the collection have at least one representative in the summary.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments are conducted on TREC and MSN datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 50,
+                            "text": "TREC and MSN datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Another major difference is that previous evaluations, including TDT, TREC Filtering, and Temporal Summarization, merely simulated the streaming nature of the document collection, whereas in RTS the participants were required to build working systems that operated on tweets posted in real time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train our model with K = 1, 2, 3, 4, 8 and 16 learners on the Stanford Online Products dataset  and report the change of the Recall@1 score during training.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 97,
+                            "text": "Stanford Online Products dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In  we plot the results of our method (named RankMatch), for M = R, compared to those achieved by a number of state-of-the-art methods which have published NDCG scores in at least two of the datasets: RankBoost , RankSVM , FRank , ListNet , AdaRank , QBRank , IsoRank , SortNet , StructRank  and C-CRF .",
+                    "annotation_spans": [
+                        {
+                            "start": 201,
+                            "end": 210,
+                            "text": "RankBoost",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 213,
+                            "end": 220,
+                            "text": "RankSVM",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 223,
+                            "end": 228,
+                            "text": "FRank",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 231,
+                            "end": 238,
+                            "text": "ListNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 241,
+                            "end": 248,
+                            "text": "AdaRank",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 270,
+                            "end": 277,
+                            "text": "SortNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 260,
+                            "end": 267,
+                            "text": "IsoRank",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 251,
+                            "end": 257,
+                            "text": "QBRank",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 280,
+                            "end": 290,
+                            "text": "StructRank",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 296,
+                            "end": 301,
+                            "text": "C-CRF",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our method across several domains, including synthetic, text, and financial data as well as a large-scale dataset on water stress detection in plants with more than 3 billion matrix entries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "data set with documents from the Internet Movie Database (IMDB).",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 63,
+                            "text": "Internet Movie Database (IMDB)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Filtering those (and noisy data from annotators) out resulted in 28,408 images, which from the basis of our TextVQA dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 123,
+                            "text": "TextVQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we find that if we initialize sampling with images from the test set, images do not move significantly.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, consider , which shows a dataset D and its frequent closed itemsets extracted with min supp = 1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is de ned as where |T | is the number of ratings in the testing dataset,r ui denotes the predicted ratings for T , and r ui is the ground truth.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To analyze the effect of using a different type of features, we evaluated our model on the Breakfast dataset using the improved dense trajectories (IDT) features, which are the standard used features for the Breakfast dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 108,
+                            "text": "Breakfast dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 208,
+                            "end": 225,
+                            "text": "Breakfast dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There is no ground truth available for this dataset, but we use it to demonstrate RIM's efficacy as a data exploration tool.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The same dataset has been used in  as a benchmark dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can see that, despite the number of pages in CNN collection be smaller than the other collections, the number of segment classes found in this collection is relatively high, if compared with those found in other collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the data capture methodology adopted by the recent single-image reflection removal benchmark dataset of , we use different postcards as background and reflection (see ) for the controlled dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "E.g., choosing a POS tag sequence by hand, sampling from a distribution of POS tag sequences seen in the dataset D, or predicting POS tag sequences conditioned on the observed image I. The first one is not scalable.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "* of clusters to consider, a clustering method M, a cluster quality measure Q(\u00b7), and a number m of negative control datasets to generate.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The most effective predictor was T N W P , which was used by the regressor for all four collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This CNN is used to label a large dataset of outdoor panoramas, which is in turn used to train a second CNN, this time to estimate the lighting parameters from a single, limited field of view image.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, for P @5, among all datasets, AttentionXML is at least 4% higher than the second best method",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "HTTP dataset and SMTP dataset have low outlier rates, and Isolation Forest has good performance on them 4 , thus the labeling set is much smaller than training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 12,
+                            "text": "HTTP dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 17,
+                            "end": 29,
+                            "text": "SMTP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "= \u03bb|d| 1\u2212\u03bb \u00b7 n(i,D) |D| , where n(i, D) denotes the number of occurrences of term i in the collection, and |D| is the collection size.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ETH dataset contains two scenes with 750 different pedestrians and is split into two sub-datasets (ETH and Hotel).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "ETH dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 103,
+                            "end": 106,
+                            "text": "ETH",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 111,
+                            "end": 116,
+                            "text": "Hotel",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After fitting LDA to the TASA corpus , we ran the particle filter with 1000 particles on the Sederberg and Norman dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 36,
+                            "text": "TASA corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 93,
+                            "end": 121,
+                            "text": "Sederberg and Norman dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Elsayed, using different techniques, similarly found the communication graph to be the best source of evidence (MRR=0.785), but at the cost of the substantially greater computational cost to simultaneously resolve all 1.3 million named references to people in the Enron collection .",
+                    "annotation_spans": [
+                        {
+                            "start": 264,
+                            "end": 280,
+                            "text": "Enron collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The user context in our work is hence addressed by taking into account musical preference and geospatial data, using a standardized collection of listening behavior mined from microblog data .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the MF-URLN on two datasets: the Visual Relationship Detection (VRD)  and the Visual Genome (VG) datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 117,
+                            "text": "the Visual Relationship Detection (VRD)  and the Visual Genome (VG) datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, we used two different training sets that were based on e-mails: the TREC dataset , consisting of 75,419 e-mail messages, and a proprietary (significantly harder) dataset of which we took 100,000 e-mails.",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 95,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 5 shows experiment results on GLM model and benchmark methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, it is worth noting that the number of queries with relevant documents on the testing set of FR-12 collection is smaller , as compared to 99 and 100 queries on the AP and TREC-45 sets (we performed   ).",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 117,
+                            "text": "FR-12 collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 172,
+                            "end": 191,
+                            "text": "AP and TREC-45 sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We select only 9 images from each subject (1800 total), and train the network with the reduced dataset for 300 epochs using the batch size of 36.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that our algorithm is much faster than other algorithms on both datasets, especially on RCV1 where we are more than 20 times faster than proximal gradient descent.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 100,
+                            "text": "RCV1",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As mentioned before, we compare TFMAP with FM using the Food dataset, according to the protocol described in Section 5.1.3.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 68,
+                            "text": "Food dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "where b > 0 can improve the MRR and MAP on all five collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results reveal that there is a positive effect of \"engineering\" the defense image database to match the task at hand: defenses based on IG-N -Targeted outperforms those based on IG-N -All by a margin of 1%\u2212 4% at the same database size.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At last, we conduct comprehensive experiments using a combined dataset containing four caption styles to demonstrate the outstanding performance of our proposed method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data collection started on April 13, 2011 and ended on May 13, 2011, which covered the duration of Singapore General Election 2011 (nomination day on April 27, 2011, and polling day on May 7, 2011).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This external knowledge source is a corpus separate from, but thematically related to, the document collection to be searched.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Closely related to our work, they also observe a correlation between test class sample population and classification accuracy in the Imagenet ZSL benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 133,
+                            "end": 155,
+                            "text": "Imagenet ZSL benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We trained the fHMM-only model on this dataset.      ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For all compared models except GRBCM, we partition the training dataset into 16 subsets using the k-means algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the tag recommendation methods on three datasets, each containing the title, tags and description associated with real objects from LastFM, YouTube and Ya-hooVideo.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We randomly select 15 images from the test set, and ask users to select up to 20 distinct ingredients as well as write a recipe that would correspond with the provided image.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we present empirical results on both simulated and real yeast eQTL datasets, which demonstrates the advantages of adaptive multi-task Lasso over many other competitors.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 84,
+                            "text": "yeast eQTL datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Empirical studies conducted using the Bookcrossing dataset show that BReK12 outperforms a number of existing recommenders (developed for general users) in identifying books appealing to K-12 users.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 58,
+                            "text": "Bookcrossing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Again, the approach was only evaluated on the CONLL-TestB data set as well as on a tweet data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 66,
+                            "text": "CONLL-TestB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With the advent of deep learning techniques, various fully automatic colorization methods have been proposed to learn a parametric mapping from grayscale to color using large datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since unsupervised learning considers no labels at all on the target domain, we cannot set hyper-parameters or do best model selection based on a labeled validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the ALOI data set, the ensemble obtained a ROC AUC score of 0.8380, which is in particular also clearly better than the previously reported results on this data set (see ).",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "ALOI data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate the cache behavior of PSS1 modeled in Section 4.2 with the Twitter dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 89,
+                            "text": "Twitter dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The main problem of KITTI stereo datasets is the small quantity, which is insufficient for training deep learning models.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 41,
+                            "text": "KITTI stereo datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results will be reported on the Tiny Images database , in combination with the CIFAR-10 label set .",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 56,
+                            "text": "Tiny Images database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 83,
+                            "end": 101,
+                            "text": "CIFAR-10 label set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As an illustrative example,  shows the relevance curve derived from the AOL query log dataset using this approach, which we shall also test in our experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 93,
+                            "text": "AOL query log dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the nonversioned NYT data set, only the Sorted method is applicable; we assigned docIDs according to the time stamps of the articles.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 33,
+                            "text": "NYT data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the 10k dataset, most of the networks converge within 24 hours on a NVIDIA GeForce GTX 1080 Ti GPU.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 19,
+                            "text": "10k dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the attribution-based confidence metric reflects the reduced accuracy on the rotated MNIST dataset with a background image .",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 98,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Meeting both these requirements is a prerequisite to the possibility of devising an effective partition strategy able to produce dataset partitions that can be mined respecting a given maximum memory constraint.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the three ad hoc TREC collections and queries contain polymorphic documents and topics from different domains, the OHSUMED collection is more monomorphic which could be seen as documents from a speific technical domain (medicine).",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 39,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 121,
+                            "end": 139,
+                            "text": "OHSUMED collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Data augmentation method is used to preprocess datasets by means of cutting, scaling, rotation, etc.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We selected data sets D from various dimensions (from 10 to 100, 000), and randomly selected 30 queries points Q \u2282 D, and queried them on D \\",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For English-Spanish document data sets, we first selected 1,000 Associated Press (AP) newswire articles published in 1989 from TIPSTER Volume 1 corpus .",
+                    "annotation_spans": [
+                        {
+                            "start": 127,
+                            "end": 150,
+                            "text": "TIPSTER Volume 1 corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 4,
+                            "end": 38,
+                            "text": "English-Spanish document data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is interesting to notice that on WebKB dataset the CPU running time of M 2 DCU is shorter than      : Parameter Sensibility on 20Newsgroup, with different C l .",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 49,
+                            "text": "WebKB dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 130,
+                            "end": 141,
+                            "text": "20Newsgroup",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As illustrated in , accuracy and compression curves are given about 6 networks, which perform on CIFAR-10 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 113,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", S J } denote the J datasets we will be using as training source datasets and let T = {T 1 , T 2 , . . .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally the experiments on UCI data sets and real world Bulletin Board Systems (BBS) data sets show the superiority of our proposed method.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 40,
+                            "text": "UCI data sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 56,
+                            "end": 94,
+                            "text": "Bulletin Board Systems (BBS) data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The training dataset is used to learn the curiosity function for each user.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "UCF101 is originally an action classification dataset collected from online videos, and a subset of 24 classes with 3,207 videos are provided with the spatiotemporal annotations for action detection.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 6,
+                            "text": "UCF101",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The right panels of  show the test set error rates of MKMs with different kernels and numbers of layers .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly, the robustness of dKVD is not desirable, which is worse than AA on Amazon and Epionions datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 107,
+                            "text": "Amazon and Epionions datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the analysis on these datasets , we can see Yahoo!",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With two benchmark datasets featuring emotional images, EASal exhibits notable improvement on evaluation metrics that indicate relative importance of salient regions within an image (i.e.., NSS, KL, IG), implying that integrating emotion information betters relative saliency prediction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is especially true for the computer dataset since it only has 13 features and only a few tasks are required to learn the task relatedness structure.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 48,
+                            "text": "computer dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each search result in the location dataset, the entropy of the result's location distribution measures how location-specific a single search result is.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "summarizes retrieval performance of Blogger Model and the baseline query expansion methods along with timebased relevance model on the TREC 2009 data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 135,
+                            "end": 153,
+                            "text": "TREC 2009 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, on heavy rainy dataset Rain100H , the performance gains by our PRN and PReNet are still significant.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 46,
+                            "text": "Rain100H",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe an improvement between 2 % and 5 % for all data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Such information is also used to prune further the dataset and its partitions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although it should be remembered experimental search engines such as Lemur have been tuned over many years to perform well on test collections with one interpretation per topic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To train and evaluate the model, we also propose a new large-scale synthetic dataset, ObMan, with hand-object manipulations.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 91,
+                            "text": "ObMan",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results for thirty individual sub-data sets in the lymphoma data set are available in the supplementary document.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 68,
+                            "text": "lymphoma data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "6.1 Benchmark Data RCV1-V2 text data sets  are popularly used as benchmark in text classification.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 41,
+                            "text": "Benchmark Data RCV1-V2 text data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "reports the performance of the three weak NERs on the two datasets SIN and SGE respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 67,
+                            "end": 70,
+                            "text": "SIN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 75,
+                            "end": 78,
+                            "text": "SGE",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "assume the existence of a collection of images with the same watermark to estimate and remove it.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To show the influence of the sample size, we generate simulated datasets with sample size 128, 256, 512, 1024, 2048, 4096, 8192, and 16384.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments with synthetic and real datasets, we demonstrate the efficiency and the efficacy of our algorithms in a variety of settings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For gpfm with SGD training, we find that a momentum of 0.9 and a fixed learning rate of 0.5 \u00d7 10 \u22124 seems to work well for most datasets 4 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to investigate the effectiveness of each additional component of the proposed work, we carried an ablation study using the MNIST dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 132,
+                            "end": 145,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the performance by measuring the accuracy of assigning the correct scene label to an image on the MIT outdoor scene dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 135,
+                            "text": "MIT outdoor scene dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reason is that the feature selected should be discriminative on the whole data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our study shows that, for the MovieLens dataset with 1 million ratings, the recommendation RMSE (root mean square error) increases from 0.8645 to 0.9 when the co-clustering setting varies from 1 \u00d7 1 to 5 \u00d7 5.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 47,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since there is no such data for learning to imitate professional filming, we collect a new dataset which contains 92 video clips and is group into 6 categories according to the camera motion styles for filming a video.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also demonstrated consistent \"better than uninformed\" prediction performance using four UCI classification datasets-three of which prove to be extremely difficult for other sample selection bias approaches.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Within the full ClueWeb dataset, Wikipedia is the main contributor of relevant documents for Web track queries.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 31,
+                            "text": "ClueWeb dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the recommendation model generates a query vector of frame attributes q = (q1, q2, ..., qN g ) (Ng=7), the image ranking module ranks the relevant eyeglasses images from the product database D, where each product image is labeled with frame attributes that annotated as Ng ), 1 \u2264 k \u2264 |D|.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Shape Reconstruction Large-scale synthetic datasets such as ShapeNet  have allowed deep neural networks to be trained on the task of shape reconstruction from single images .",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 68,
+                            "text": "ShapeNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also test the methods using Cell dataset containing a set of measurements such as the area and perimeter biological cells with separate labels marking cells subjected to treatment and control cells.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 43,
+                            "text": "Cell dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We note that these data sets are extremely high dimensional, and even deterministic data sets cannot be effectively indexed in such cases.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We see in  that the corpus-based predictors outperform, to a statistically significant degree, the Wikipediabased predictors for three out of four collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, the proposed method shows superior performance over existing methods on datasets of various scales.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Penn-Fudan dataset (at 200 \u21e5 100 pixels) was then split into 10 train/test cross-validation splits without replacement.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 22,
+                            "text": "Penn-Fudan dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "on the MovieLens 1M dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 27,
+                            "text": "MovieLens 1M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In these challenges, every participant is provided a collection of documents and a set of topics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "8-fold cross-subject average accuracies of different hand gesture methods on the VIVA hand gesture dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 106,
+                            "text": "VIVA hand gesture dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the real-world datasets where the annotator's labelings are much more noisy and diverse than our simulated condition, the proposed Hybrid-MST shows higher robustness to these noisy labelling than others.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Training and testing is performed on different topic sets; for TREC topics, we train and test on different years (train on TERA04, test on TERA05/06; train on TERA05, test on TERA04), and for the Web collection we perform 10-fold cross validation across the whole topic set.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 67,
+                            "text": "TREC",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 123,
+                            "end": 129,
+                            "text": "TERA04",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 139,
+                            "end": 148,
+                            "text": "TERA05/06",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 159,
+                            "end": 165,
+                            "text": "TERA05",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 175,
+                            "end": 181,
+                            "text": "TERA04",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The PAMM Algorithm Input: training data {(X (n) , R (n) , J (n) )} N n=1 , learning rate \u03b7, diversity evaluation measure E",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, all these methods aim to enhance the discriminative power of the learned features, Comparison of SphereFace and UniformFace on the LFW dataset, where points in different colors represent the learned deep features from varying classes, and the numbers on the figure are located at the class centers.",
+                    "annotation_spans": [
+                        {
+                            "start": 140,
+                            "end": 151,
+                            "text": "LFW dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the Caltech-UCSD bird dataset  and computed average accuracy on 100 episodes of 5-way-1-shot classification.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 29,
+                            "text": "Caltech-UCSD bird dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Empirical studies on real-world datasets show that the proposed method can significantly boost the performance of multi-label classification by considering missing labels and incorporating label correlations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset, SR # requires slightly more memory than MSR because it requires to store D k .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The KITTI dataset is stratified into easy, moderate, and hard difficulties, and the official KITTI leaderboard is ranked by performance on moderate.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An example for Bayesian Neural Networks on a synthetic regression dataset is given in Appendix F.1, where we illustrate the quality of SLANG's posterior covariance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If we directly use existing multi-label learning methods on such datasets, the missing labels in the training data will be treated as negative examples, and the performances of multi-label classification will degenerate greatly due to the simple treatment.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments on the Web dataset were conducted on a distributed system and the Distributed RLSI was implemented with SCOPE language .",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 34,
+                            "text": "Web dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The package-similarity based retrieval used in their framework was not feasible on the Google Landmarks datasets, which has 1.2 million images.",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 112,
+                            "text": "Google Landmarks datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The method sequentially processes single data sample and updates a running version of a collection of constant-size sets of representative samples of the clusters, needed for convex interpretations of each basis element.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the ROC curve for the TD2004 dataset, informational user model with the DBN model used for generation.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 42,
+                            "text": "TD2004 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is demo application enables the user to interact with the pooling method with ease, and develops visual hints in order to analyze existing test collections, and build be er ones.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A summary of the recognition results of the experiments and the simulations using the MNIST dataset is shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 99,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are three main steps to our approach: (1) learning a weakly-supervised region proposal network (W-RPN) on video collection V ;",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Y is the permission request that mobile app x i should follow, The training mobile app dataset includes the mobile apps whose permission request patterns and functionalities are manually labeled by mobile app experts.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the 2013Jan dataset, we instead crawled the 'Bursting Phrases' service of Bitly's Data API at 30 minute intervals, since Bitly updates its bursting phrases more frequently than Google Trends.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 23,
+                            "text": "2013Jan dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is split into the same training, validation and evaluation sets as previous works .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To compare the ability to handle missing attributes in training data of our algorithm with other methods, we perform some experiments on the well known Oil Flow dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 152,
+                            "end": 168,
+                            "text": "Oil Flow dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The CiteSeer dataset consists of 3312 scientific publications classified into one of six classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "CiteSeer dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Scalability: Computing the similarity matrix for N users in an M -item collection is an O(N 2 M ) problem.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Table 4 (left) shows the validation performance improvement across all multi-task implementations and the singletask STAN implementation, plotted relative to the performance of the vanilla single-task learning on the CityScapes dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 217,
+                            "end": 235,
+                            "text": "CityScapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "it is much more robust than other defenses with this phenomenon on CIFAR-10 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 67,
+                            "end": 83,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on Synthetic Dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The NTCIR-11 Wikipedia benchmark  includes 100 queries for measuring specific-item retrieval performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 32,
+                            "text": "NTCIR-11 Wikipedia benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on the CIFAR-10  and ImageNet datasets show that our CI-BCNN outperforms most state-of-the-art binary neural networks by a large margin across various network architectures.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 50,
+                            "text": "CIFAR-10  and ImageNet datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first conduct a dimension reduction for the whole dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "works is the bibliographic network constructed from the D-BLP database  , which is illustrated in .",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 70,
+                            "text": "D-BLP database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "gives the number of documents (N ), number of unique terms (M ), aver-age document length and number of test queries for the collections we retained: ROBUST (TREC), TREC3,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Generalization performance is measured on test set episodes, where S, Q are now sampled from a corpus C test containing classes distinct from those used in C train .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Four input images from our CAPTCHA corpus, along with the final results and convergence trajectory of typical inference runs.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 41,
+                            "text": "CAPTCHA corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We do not include the caching technique in SMO or our SFW implementations for the purpose of fair comparisons, since its performance varies for different datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These datasets together with SogouT-16 allow researchers to investigate a wide range of topics in information science and language technologies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consider a dataset with two plays, each with two agents in an \"arbitrary agent order\": The superscript in parenthesis indicates the play number, and the ordering index is paired together with the agent location in brackets, i.e., x 1 is index 1 and x 2 is index 2 for both examples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments on small datasets for problems including regression, classification, Log Gaussian Cox processes, and warped GPs  show that SAVIGP can perform as well as the full method under high levels of sparsity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All of these datasets measure cancer survival using gene expression levels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The entire TREC dataset was used for evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 23,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The quality of the reconstruction is evaluated by measuring how well it predicts the reprojection of the remaining points in the dataset as well as their 3D reconstruction, once again registered to the ground truth through a homography.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In what follows, all parameter tuning is performed on the TIMIT training set (divided into independent training and development sets), and all error rates are measured on the main test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 76,
+                            "text": "TIMIT training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the same approach as the real data sets in order to add uncertainty to the base data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Detailed statistics about the dataset are shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The performance of the proposed framework under ZSL and GZSL settings for the HMDB51 dataset using the transferred attributes (denoted by m) and different features is reported in Tab.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 92,
+                            "text": "HMDB51 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also conduct thorough experiments on our newly collected dataset to quantitatively evaluate the performance of our method with respect to viewpoint and body pose changes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let D be a dataset that consists of input feature vectors X = {x 1 , . . . ,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We took NTCIR-7 data (1.8M parallel sentences) from the years 1993-2000 for SMT training and the NTCIR-8 test collection (2k sentences) for parameter tuning.",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 120,
+                            "text": "NTCIR-8 test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reason is that they are discriminative w.r.t. image rotations and do not have a good generalization ability in a rotated dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As this set contains longer time-scale activities, the data is segmented into 1 minute chunks and 321 different features are computed, similar to the first dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "No generalized prediction performance needs to be considered because the input dataset contains all available observations, not a set of samples, and thus there is no i.i.d. assumption.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both NetHEPT and DBLP datasets are co-authorship networks; the Email-Enron dataset is an email communication network; and the Epinions dataset is a trust social network.",
+                    "annotation_spans": [
+                        {
+                            "start": 5,
+                            "end": 30,
+                            "text": "NetHEPT and DBLP datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 63,
+                            "end": 82,
+                            "text": "Email-Enron dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 126,
+                            "end": 142,
+                            "text": "Epinions dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "NTU-RGB+D. NTU-RGB+D, containing 56, 880 skeleton action sequences completed by one or two performers and categorized into 60 classes, is one of the largest data sets for skeleton-based action recognition .",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 20,
+                            "text": "NTU-RGB+D",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is a useful property since it allows us to use a vanilla model to partially defend against adversarial examples when we are not able train our own classifier on quasi-natural images due to limitations such as access to the entire dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the query set equals the reference set (Q = R, and we denote D as the common dataset and N = |D|",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first data set is the INRIA Holidays data set 1 , which has been widely used for benchmark evaluation of image retrieval performance .",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 49,
+                            "text": "INRIA Holidays data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recently, state-of-the-art learning-based methods, including convolutional neural networks (CNNs), have consistently outperformed statistical methods when validated on specific datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By leveraging powerful ConvNet feature representations trained on ImageNet and adapted on the large amount of segmented training data available in COCO, we are able to beat the state of the art in object proposals generation under multiple scenarios.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 74,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 147,
+                            "end": 151,
+                            "text": "COCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, as shown in  for the Web data set, as \u03b3 increases, the number of outlier clusters with comparatively very large discrepancy decreases.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 46,
+                            "text": "Web data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MIR Flickr collection  consists of 250,00 images, and the original tag data contributed by users.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 25,
+                            "text": "MIR Flickr collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We describe the proposed overall approach for POI change detection in Section 4 and benchmarks different methods and options in Section 5.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The set of views of a face from all possible viewpoints is an extremely high-dimensional data set when represented as image arrays in a computer or on a retina; for example, 32 x 32 pixel grey-scale images can be thought of as points in a 1 ,024-dimensional observation space.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the critical difference diagrams when the results are broken down into the original UCR data sets and those provided by us.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 103,
+                            "text": "UCR data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we use SVD as our base algorithm, since it represents a family of factorization-based methods that work well on the Netflix dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 131,
+                            "end": 146,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Visual tracking has witnessed a rapid boost in the last decade due to the construction of new benchmark datasets  and improved methodologies .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We are also given a video collection V = {V 1 , . . . , V M }.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The page titles of those templates linking to the category or some subcategory are obtained from the local Wikipedia database, which results in 272 different cleanup tags.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 125,
+                            "text": "Wikipedia database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is hoped that in the future the proper distribution can be estimated for | either through the search behavioral data or cross-validations among a diverse collection of multiple spellers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the pre-defined test-train split from ModelNet, all trained networks achieved accuracy within 4% of the reported accuracy on the test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 52,
+                            "text": "ModelNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset contains 60 short sequences with various challenges.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the other approach refines the original query by expanding it with selected key concepts (i.e., bigrams or phrases) from the query patent using the global analysis of the patent collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After filtering invalid responses, we collected 900 questionnaires, i.e., 150 for each model on each dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further validate the effectiveness of our proposed feature-level DA methods on standard UDA benchmarks, namely digits and traffic signs",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 105,
+                            "text": "UDA benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show the tanh and sigmoid functions implemented using the proposed WIStanh function where the FSM was trained on the Penn Treebank dataset  when performing the character-language modeling task.",
+                    "annotation_spans": [
+                        {
+                            "start": 117,
+                            "end": 138,
+                            "text": "Penn Treebank dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second is the fact that some of the datasets employed have globular clusters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on the Wiki dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 27,
+                            "text": "Wiki dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Indeed, the experiments demonstrate that the leading SOTA models score lower than the baselines (for this metric, lower is better), indicating increased capacity in fitting more subtle trends of the dataset's distribution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset used for these experiments is already larger than those used for published results for genre and artist classification.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We downloaded the data set from http://www.cs.ucr.edu/~wli/",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed framework is evaluated through extensive experiments on the topic collection as well as a 5-year (2012-16)",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 89,
+                            "text": "topic collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The SBD dataset , for evaluating object segmentation techniques, is divided into a training set of 8,498 images and a validation set of 2,820 images.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "SBD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider training with the first 100 (alphabetically) categories of the ILSVRC2013 detection validation set (on val1) and report mean average precision (mAP) over the 100 trained on and 100 held out categories (on val2).",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 110,
+                            "text": "ILSVRC2013 detection validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The inputs are the dataset N and the number of levels",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When researching the presence of duplicate documents in web collections, Bernstein and Zobel  pointed out that a number of duplicates in two web test collections (TREC's GOV1 and GOV2) had been \"inconsistently classified\" by relevance assessors.",
+                    "annotation_spans": [
+                        {
+                            "start": 163,
+                            "end": 183,
+                            "text": "TREC's GOV1 and GOV2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset consists of the following road agents: cars, buses, trucks, rickshaws, pedestrians, scooters, motorcycles, carts, and animals and is collected in dense Asian cities.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To generate the text corpus, the model generates the instances of every word type together with their contexts in turn.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section we show the drought regions detected by the algorithm using the CRU dataset, which has monthly precipitation from the years 1901-2006.",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 91,
+                            "text": "CRU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They did not address the question detection subtask in the work and their approach is a complicated method that may not be applied to larger datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we evaluate on real data, the Standford Drone Dataset (SDD), where groundtruth distributions are not available and the evaluation must be based on a single ground-truth sample of the true distribution.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 66,
+                            "text": "Standford Drone Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "from the collection of calibration RGBD images and poses {\u012a n calib ,\u03ba n calib }, we select one that best resembles the target pose \u03ba in the viewpoint",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the eleven directors mentioned in our IMDb dataset, we selected their 73 movies directed by them from year 2000 to 2010.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 55,
+                            "text": "IMDb dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the performance of our method using the dataset collected from the well-known geosocial multimedia network Gowalla and Flickr, and then show that our method achieves the best performance than other state-ofthe-art solutions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The rotated dataset has 20,044 images for training and 19,808 for test (4 times as many as the original dataset).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the images in the AT&T database contain the facial contours and vary in pose as well scale, the face images in the Yale database have been cropped and aligned.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 37,
+                            "text": "AT&T database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 121,
+                            "end": 134,
+                            "text": "Yale database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When the clusters are tight, these data points represent a small spatial locality with respect to the rest of the data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These derived relevance assessments are shown in red in the table of To conclude, we create a new dataset containing intent hierarchies by manually grouping the official intents from TREC Web track test collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 183,
+                            "end": 214,
+                            "text": "TREC Web track test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Second, with the established DAVSOD dataset and previous 7 VSOD datasets , we present a comprehensive evaluation of 17 state-of-the-art models , making it the most complete VSOD benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 43,
+                            "text": "DAVSOD dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 59,
+                            "end": 72,
+                            "text": "VSOD datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 173,
+                            "end": 187,
+                            "text": "VSOD benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After a discussion of the spectral mapping problem in Section 4, the APASCI corpus used in the experiments and the characteristics of the acoustic data are described in Section 5.",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 82,
+                            "text": "APASCI corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A straightforward design of a distance function would simply generalize the standard L k metric to the case of uncertain data sets without using the uncertainty information.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "He has co-authored many publications in information retrieval evaluation, test collection building, text ltering, collaborative ltering, and intelligent software agents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ICCV'17 PoseTrack Challenge Dataset  is a large-scale benchmark for multi-person articulated tracking, which contains 250 video clips for training and 50 sequences of videos for validation.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 35,
+                            "text": "ICCV'17 PoseTrack Challenge Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform experiments on the Movielens 10M dataset which contains 10 million ratings from 69878 users on 10677 movies.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 51,
+                            "text": "Movielens 10M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As mentioned in Section 5.1.3, the comparison between FM and TFMAP is conducted on the Food dataset, due to the applicability of FM.",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 99,
+                            "text": "Food dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Overview of SogouT-16 Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 29,
+                            "text": "SogouT-16 Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given the output of an algorithm over a set of images, we compute mask average precision (AP) using the definition and implementation from the COCO dataset  (for more detail see \u00a72.3).",
+                    "annotation_spans": [
+                        {
+                            "start": 143,
+                            "end": 155,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, on the test set, RuleNet's ability to generalize is 300% to 2000% better than the other systems (Table1).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe that TMSA outperforms all baseline methods on all datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection contains the full content of emails, various meta information as well as a achments, folders, calendar entries, and contact details from Outlook mailboxes for 279 company employees.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we check also the confidence on adversarial samples computed for the test set of the in-distribution dataset using 80 iterations of PGD with \u01eb = 0.3, stepsize 0.0075 for MNIST and \u01eb = 0.1, stepsize 0.0025 for the other datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 179,
+                            "end": 184,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We assume that the dataset satisfies the conditions of y i \u2264 1 and z i \u2264 1 for all",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Khresmoi survey has shown that the three most common sources of online information used by physicians (in decreasing order of usage) are: general search engines (e.g. Google, Bing, Yahoo!), medical research databases (e.g. Pubmed) and Wikipedia.",
+                    "annotation_spans": [
+                        {
+                            "start": 227,
+                            "end": 233,
+                            "text": "Pubmed",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We expect that our proposed interactive mode has an advantage over requesting prior knowledge from the outset, as it may be easier for the user to identify or recall relevant features while labeling documents in the collection and being presented with candidate features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our model outperforms the winning method of the Open-Images Relationship Detection Challenge by 4.7% (16.5% relatively) on the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "used the proprietary Google News corpus consisting of 6 billion words, while the 320-million-word training set used in  is a compilation of several Linguistic Data Consortium corpora, some of which available only to their subscribers.",
+                    "annotation_spans": [
+                        {
+                            "start": 148,
+                            "end": 182,
+                            "text": "Linguistic Data Consortium corpora",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 21,
+                            "end": 39,
+                            "text": "Google News corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first collection consists of publicly available real-  : Results on sparse, real-world graphs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We denote the post content matrix as C \u2208 R N\u00d7d , where d is the dimensionality of the posts, which is constructed by implementing Latent Dirichlet Allocation (LDA)  on the post corpus to discover 100 topics.",
+                    "annotation_spans": [
+                        {
+                            "start": 172,
+                            "end": 183,
+                            "text": "post corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Tables 1 and 2 report average precision on the S3DIS and Matterport3D datasets, showing that our approach outperforms all baselines.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 78,
+                            "text": "S3DIS and Matterport3D datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the supplement, we show that the nearest neighbor of generated images are not identical to images in the training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Web-I dataset, the shared topics seem to characterize general information.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "Web-I dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In CLEF17 dataset, 50 SRs are divided into two sets: 20 SRs as training set (for developing supervised models), and 30 SRs as test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 17,
+                            "text": "CLEF17 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We encode each of L words from a sentence x using the GloVe  pretrained on the Common-Crawl dataset, producing L 300-dim vectors, and use them as local features \u03a8(x)",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 99,
+                            "text": "Common-Crawl dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "combined a set of gradient features, using brightness, color and textons, to outperform the Canny edge detector on the Berkeley Benchmark (BSDS).",
+                    "annotation_spans": [
+                        {
+                            "start": 119,
+                            "end": 144,
+                            "text": "Berkeley Benchmark (BSDS)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "[33] contains 120,919 movie plot text summaries gathered from the Internet Movie Database.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 89,
+                            "text": "Internet Movie Database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To address our research questions we use the largest Twitter collection for reputation monitoring known to us, the RepLab2013",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 71,
+                            "text": "Twitter collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We significantly advance the state-of-the-art performances on public face anti-spoofing datasets, thus providing the community a promising direction along with building powerful anti-spoofing solutions in practice.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Particularly in the case of the Daily Mail dataset, highlights frequently have significant lexical overlap with passages in the accompanying article, which makes it easy for the word distance benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 50,
+                            "text": "Daily Mail dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, GSDT allows redundancy in covering the training data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on f ve TREC datasets of varied size and content indicate the proposed model signif cantly outperforms the optimized BM25 and DirichletLM in MAP over all datasets with all kernel functions, and excels the optimized BM25 and DirichletLM over most of the datasets in P@5 and P@20",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 33,
+                            "text": "TREC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All other parameters remain the same as the experiments on the WIDER dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 76,
+                            "text": "WIDER dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent developments, like the ION-algorithm by Tillman et al. , have shown that it is possible to integrate multiple, partially overlapping data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is confirmed by the experiments: the performance on ESL (9 labels) is relatively bad, whereas the performance on the binary classification data sets is quite good.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments with TREC Ad-hoc and Legal Track datasets demonstrate that with high quality manual expansion, this diagnostic approach can reduce user effort by 33%, and produce simple and effective structured queries that surpass their bag of word counterparts.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 53,
+                            "text": "TREC Ad-hoc and Legal Track datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also use standard data augmentation which includes random crops for all datasets and random mirroring for CIFAR-10 and CIFAR-100.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 117,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 131,
+                            "text": "CIFAR-100",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the UCI data set was divided into 476 training and 119 test instances in five-fold cross validation, with four times as many advertisement as non-advertisement instances.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 25,
+                            "text": "UCI data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One is a homogeneous synthetic data set generated by the random walk expression,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Test \"T\", number of times seen  summarises the number of times each document in the email collection was returned by each sampler, and the theoretical distribution described in Section 5.3.",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 100,
+                            "text": "email collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Nevertheless, due to the limited receptive field size of CNNs and the text representation forms, these methods fail to detect more complex scene text, especially the extremely long text and arbitrarily shaped text in datasets such as ICDAR2017-RCTW , SCUT-CTW1500 , Total-Text  and ICDAR2017-MLT .",
+                    "annotation_spans": [
+                        {
+                            "start": 234,
+                            "end": 248,
+                            "text": "ICDAR2017-RCTW",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 251,
+                            "end": 263,
+                            "text": "SCUT-CTW1500",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 266,
+                            "end": 276,
+                            "text": "Total-Text",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 282,
+                            "end": 295,
+                            "text": "ICDAR2017-MLT",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, samples in a dataset are not drawn randomly from the population, but from the people who have higher degrees than a bachelor's degree.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "* k for the TREC WT10G dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 30,
+                            "text": "TREC WT10G dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We simply consider here that \u03bbw is set to either the average number of occurrences of w in the collection, or to the average number of documents in which w occurs, that is:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Can the proposed task-consistency method be applied to other instructional video datasets?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the standard MNIST dataset , which represents each handwritten digit as a vector of length 784 (that is, an image of size 28 \u00d7 28).",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 33,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "in creating, curating and hosting the spam corpus is admirable, its usage comes with a few caveats.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While some of this research has shown improvement in retrieval effectiveness, studies of this type are hampered by a lack of test collections containing ambiguous queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, we obtain a number of memory banks equal to the number of different semantic classes found in the dataset, plus the background class, where patches belonging to the same class are placed together",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For both datasets, incorporating more unlabeled data improves accuracy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, an appropriate method can always be selected based upon where the dataset lies in the gappiness spectrum.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We describe a complete framework for full-text indexing and querying in versioned document collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to demonstrate the generalization ability of our dataset and metrics, deep stereo models are trained on FlyingThings3D , Cityscapes , and our Driv-ingStereo dataset respectively then compared.",
+                    "annotation_spans": [
+                        {
+                            "start": 113,
+                            "end": 127,
+                            "text": "FlyingThings3D",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 130,
+                            "end": 140,
+                            "text": "Cityscapes",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 151,
+                            "end": 173,
+                            "text": "Driv-ingStereo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For illustrative results, the instances were randomly divided into a training and a validation sets in roughly equal sizes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Second, as illustrated with all four measures on the la1 data set, some metric, angle in this case, suits the data set better than other metrics.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 65,
+                            "text": "la1 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We only report the results on MovieLens data set as the observations are similar when other data sets are used.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 48,
+                            "text": "MovieLens data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "created a large synthetic dataset to train end-to-end deep neural network for disparity estimation (e.g. DispNet) .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "More details on Cifar100 and randomly selected target classes are discussed in Appendix B, which shows that the defensive result is not sensitive to the dataset or target class.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The general CNC strategy described here starts with a set {P k } of k-partition clusterings for a given dataset over a specified range of k values.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A significant contribution of this work is that we report the first of these results using a comparatively large, publicly available test set, the CLEF 2006/7 CL-SR collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 147,
+                            "end": 175,
+                            "text": "CLEF 2006/7 CL-SR collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For digit datasets, we use LeNet  and train the model from scratch.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The relevant dimension for each simulated spike train was obtained as an average of 4 jackknife estimates computed by setting aside 1/4 of the data as a test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Mammography is a dataset of medical tests",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 11,
+                            "text": "Mammography",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each small data set, we created several profiles, one profile for each node in a sub-tree, to simulate multiple users, each with a related, yet separate definition of relevance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Previous best result on Stanford Cars fine-tuned weights originally trained on a larger dataset , whereas we use a randomly initialized network.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 48,
+                            "text": "Stanford Cars fine-tuned",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our camera is fixed on the windscreen and doesn't fully satisfy the requirements given by the Ackermann motion model (i.e. position on top of the back wheel axis), but-as proven in -the restrictive model is still applicable if the rotation angle \u03b8 between two camera poses is sufficiently small, which is the case in our datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The operators are trained on two different Web collections: GOV2, TREC's Terabyte track collection , and a subsample of the Web from 2011.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 64,
+                            "text": "GOV2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 66,
+                            "end": 98,
+                            "text": "TREC's Terabyte track collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, test collections only exist for a limited number of genres, data types, and search tasks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments, superior performances were achieved by simply setting the same values of \u03b1, \u03b2, and \u03b3 for all data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Amongst these tools, latent variable models prove to be particularly useful for analyzing such data sets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that, by exploiting the inherent geometry of the dataset-namely, that it consists of pairwise comparisons-the greedy algorithm's complexity can be reduced to O(N 2",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, since the data sets used have very different characteristics (e.g. the average query length for MQ2007 is 40 and for MSLR-WEB10K -120), the experiment suggests that the algorithm achieves good performance for a wide range of input problems.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We make an open dataset that includes more than 17K images captured by the DAVIS camera to learn a generic model for event-to-image/video translation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the CIFAR-10 dataset, SparseFool still manages to find very sparse perturbations, but less so than the \"one-pixel attack\" in this case.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 24,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It can handle moderate-size datasets, but cannot scale to large-scale datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On TREC2003 data set, our algorithms are comparable or slightly better than RankSVM, but RankBoost is much worse while on TREC2004",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 20,
+                            "text": "TREC2003 data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 130,
+                            "text": "TREC2004",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset contains various pop-out patterns where a target stands out from the rest of the items in terms of color, orientation, density, curvature, etc.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further validated the proposed method on three real datasets: 1) the Movielens-100k dataset 3 , containing the ratings of different users to different movies 2)",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 94,
+                            "text": "Movielens-100k dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the case of the Citation data set, the backward algorithm did not perform as well, though it will still superior to the static algorithm for a portion of the X-axis range.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 37,
+                            "text": "Citation data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The tutorial is augmented with a live demo where participants have a chance to implement a click model and to test it on a publicly available dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Exceptionally, only on the AWA dataset, where there is a significant imbalance among training classes, we additionally synthesize examples for the seen classes to obtain equivalent number of training  examples for the seen classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 38,
+                            "text": "AWA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This behavior can be observed on dataset 0046, which does not contain sharp turns.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On both Micro-F1 and Macro-F1, the proposed TerseSVM method performs better than all the baselines and state-of-the-art HR-SVM method on all datasets, except the NEWS20.",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 168,
+                            "text": "NEWS20",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the clarity of explanation, consider a dataset with one-dimension signals.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We adopt the same evaluation procedure as the KITTI benchmark and only consider the detections within the front 90 \u2022 field of view of the sensor and up to 70 meters.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 61,
+                            "text": "KITTI benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate this by a comparison of classification performance with different sets of features learned on the MNIST dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 113,
+                            "end": 126,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The retrieval quality measures we use to report results on the Web data sets in our study are Non-Discounted Cumulative Gain (NDCG) and",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the last decades the IR community has branched out significantly in a variety of domains and applications, with the creation of specific IR test collections focusing on specific problems.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reported metrics are averaged across the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider the task of multi-label classification on Bibtex dataset with 159 labels and 1839 input variables and Bookmarks dataset with 208 labels and 2150 input variables.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 68,
+                            "text": "Bibtex dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 131,
+                            "text": "Bookmarks dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to get a quantitative idea of the magnitudes of these effects, the method proposed here was applied to the Accidents data set .",
+                    "annotation_spans": [
+                        {
+                            "start": 116,
+                            "end": 134,
+                            "text": "Accidents data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The procedure is repeated for all records in the training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experimental results on the MegaFace Challenge I, IARPA Janus Benchmark A (IJB-A), Youtube Faces (YTF) and Labeled Faces in the Wild (LFW) datasets show the effectiveness of the proposed uniform loss.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 157,
+                            "text": "MegaFace Challenge I, IARPA Janus Benchmark A (IJB-A), Youtube Faces (YTF) and Labeled Faces in the Wild (LFW) datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate the idea on three large datasets of object and scene categories, and show its clear advantages over status quo models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We focus attention on the Graph500 benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 44,
+                            "text": "Graph500 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The choice of this test collection is due to the availability of manually specified domain for each query.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Second, the new loss leads to better solutions on both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All the proposed methods presented in this paper can be straightforwardly adapted to a cross-validation setup.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The KC4 dataset has been taken from the NASA data metrics program (http://mdp.ivv.nasa.gov/).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "KC4 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this work we perform our analysis and experiments on a large document collection taken from Yahoo Answers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The mean loss achieved over 1000 pairs of content and style images taken from the 2014 MS-COCO validation set also improves when these parameters increase.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 109,
+                            "text": "2014 MS-COCO validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The causal accuracy is 0.033 and 0.030 on the dataset with unknown confounders, and 0.039 and 0.033 on the dataset without unknown confounders.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each instance in the normalized data set contains 85 time points.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We carry out a nal experiment on the CQA-QL-2015 corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 55,
+                            "text": "CQA-QL-2015 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Prediction results for quality ratings by individual users in CEED2016 database (randomly removed ratings, 100 repetitions).",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 79,
+                            "text": "CEED2016 database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the test set, PFMs with 100, 500, 1O(}(} and 3000 predictive contexts achieved an NNL of 0.622, 0.518, 0.510 and 0.435.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, we extract a small social graph from the last.fm dataset as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 62,
+                            "text": "last.fm dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To verify this, we follow  and use the complete Flickr1M dataset to evaluate the consumptions of training time and memory.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 64,
+                            "text": "Flickr1M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Even though Res-cGAN produces high-quality colorization in most cases, it fails to preserve the groundtruth color of rare instances or completely fails in one-shot learning settings (e.g., results on the Pokemon dataset).",
+                    "annotation_spans": [
+                        {
+                            "start": 204,
+                            "end": 219,
+                            "text": "Pokemon dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A naive implementation of the optimization of L over {m, V } is prohibitively costly for data sets with large T , as the posterior covariance V has O((dT ) 2 ) elements and has to be optimized over the set of semi-definite matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Along with the data set, the results of some previous classification methods  on it are also provided.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Word embedding based ranking, AES, shows good potential to be a good performer, at least on the 30 SR dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 109,
+                            "text": "30 SR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The training is performed on the Liberty set of PhotoTourism dataset for all descriptors and with identical setup.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 68,
+                            "text": "PhotoTourism dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 33,
+                            "end": 44,
+                            "text": "Liberty set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first consider the synthetic Tosca dataset , and separately analyze the Centaur (six poses) and Horse (eight poses) meshes.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 45,
+                            "text": "Tosca dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The classification results on Yeast data set are shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 44,
+                            "text": "Yeast data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Namely, collections of webpages are typically organized into websites whose contents are topically related.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our training and test datasets are prepared based on three folds of methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We complement our theoretical results by an empirical study, in which we compare the different approaches on the SUSHI data set, 1 a standard benchmark for preference learning.",
+                    "annotation_spans": [
+                        {
+                            "start": 113,
+                            "end": 127,
+                            "text": "SUSHI data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results show that, with sparsity-inducing penalization, our proposed model is able to improve the prediction performance of the parametric censored regression on the highdimensional datasets for different kinds of distributions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this experiment, we tested the effectiveness of GRLSI and GNMF by using the topics generated by them with the Web-I dataset in search relevance ranking on the Web-II dataset  .",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 176,
+                            "text": "Web-II dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 113,
+                            "end": 126,
+                            "text": "Web-I dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, a large range of experimental statistics from four representative modern convolutional networks (i.e., PreResNet, ResNeXt, DenseNet, Wide ResNet) on CIFAR10/100 datasets verified our findings.",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 182,
+                            "text": "CIFAR10/100 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "GLUE is a collection of nine language understanding tasks built on existing public datasets, together with private test data, an evaluation server, a single-number target metric, and an accompanying expertconstructed diagnostic set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Three were exploratory information problems (EX) and were created based on topics used in the TREC Aquaint collection (although results were from the Web, not the Aquaint corpus).",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 117,
+                            "text": "TREC Aquaint collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 163,
+                            "end": 177,
+                            "text": "Aquaint corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Upon completion of this tutorial, attendees will: \u2022 be familiar with the history of the test collection evaluation paradigm; \u2022 understand the process of beginning from a concrete user task and abstracting that to a test collection design; \u2022 understand di erent ways of establishing a document collection; \u2022 understand the process of topic development; \u2022 understand how to operationalize the notion of relevance, and be familiar with issues surrounding elicitation of relevance judgments; \u2022 understand the pooling methodologies for sampling documents for labeling, and be familiar with sampling strategies for reducing e ort; \u2022 be familiar with procedures for measuring and validating a test collection; and \u2022 be familiar with current research issues in this area.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is well known that deep convolutional networks trained on large datasets provide a rich hierarchical representation of images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While some tasks ( ) and some linguistic phenomena  in Appendix B) measured in GLUE remain difficult, the current state of the art GLUE Score as of early July 2019 (88.4 from  surpasses human performance (87.1 from    : GLUE benchmark performance for submitted systems, rescaled to set human performance to 1.0, shown as a single number score, and broken down into the nine constituent task performances.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As evident from (b), in most cases (except for the ClueWeb-B collection)",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 71,
+                            "text": "ClueWeb-B collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "LL+Sem and LL+Rel perform better than LL in terms of MAP and P@10, in all collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Kasthuri dataset is freely available online 1 and rep-1 https://neurodata.io/data/kasthuri15/ resents a region of the neocortex imaged by a scanning electron microscope (SEM).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "Kasthuri dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, when evaluated on the popular instance segmentation benchmark, the PASCAL VOC 2012 dataset , our method achieves substantially better performance than the state-of-the-art box-level instance segmentation method .",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 99,
+                            "text": "PASCAL VOC 2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It turns out that the web hosts in this collection are clustered in the top 18 positions on \"Number of words\", top 22 on \"Top 100 corpus precision\" and top 45 on \"Independent LH\".",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To this end, we considered queries from our validation set in which Matched ranked the clicked document for the query in one of the top 3 positions while Full ranked it far below.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, there is a gap between these datasets in resolutions, image quality, and pose distributions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These algorithms have shown impressive results on very large corpora like Wikipedia articles, where it is not even feasible to store the whole dataset in memory.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 92,
+                            "text": "Wikipedia articles",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this reason, we use another dataset, Food dataset , which has also been used in the work on FMs .",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 53,
+                            "text": "Food dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides, the resulting CLQS is tested with French to English CLIR tasks on TREC collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 91,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both networks are pre-trained on the ILSVRC2012 dataset , and fine-tuned in our training set for 40 epochs using batch size 256.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 55,
+                            "text": "ILSVRC2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we show in figure 2 the evolutions on the COIL 2 , DIGIT and USPS data sets of the classification and both risks of the Gibbs classifier (on the labeled and unlabeled training sets) for different number of rounds in the SLA",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 88,
+                            "text": "COIL 2 , DIGIT and USPS data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Even when the data collection is conducted with the greatest of care, the data is still replete with non-mouse vocalization sounds, such as the mice interacting with the feeding apparatus, miscellaneous sounds from the lab (doors slamming, human speech, etc.), and electronic noise in the recording equipment.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments, we report the evaluation of 35 context extraction combinations on a collection containing 20 news articles from Yahoo!",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To further contribute the community a complete benchmark, we systematically assess 17 representative VSOD algorithms over seven existing VSOD datasets and our DAVSOD with totally \u223c84K frames (largest-scale).",
+                    "annotation_spans": [
+                        {
+                            "start": 137,
+                            "end": 150,
+                            "text": "VSOD datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first train a VGG-16 network on in-distribution data (STL-10) and then predict on classes sampled from the ImageNet database that have not been encountered during training.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 63,
+                            "text": "STL-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 110,
+                            "end": 127,
+                            "text": "ImageNet database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also harvested news articles from the Slashdot website and created 2 new datasets: (i) slash-7:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Nw N (Nw and N denote the number of documents in the collection that contain w and the total number of documents in the collection, respectively), and t(w, D) =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second dataset is the CAT2000  which is composed of 2000 images from different categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 33,
+                            "text": "CAT2000",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All measurements of prediction performance are with respect to the 50K validation images from the ImageNet12 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 116,
+                            "text": "ImageNet12 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We indexed 786126 images extracted from the MIRFLICKR-1M image collection, which has 1 million Flickr images.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 73,
+                            "text": "MIRFLICKR-1M image collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Several labeling approaches attempt to enrich contentonly terms by exploiting external resources for labeling, for example, the WordNet lexical database  was used to extract root meanings of important terms and to determine semantic relationships among these terms.",
+                    "annotation_spans": [
+                        {
+                            "start": 128,
+                            "end": 152,
+                            "text": "WordNet lexical database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While we use similar building blocks to these methods (encoder-decoder networks, U-nets, adversarial loss, and perceptual loss), our networks can generalize to arbitrary images, and are trained on large scale indoor and outdoor SfM datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 228,
+                            "end": 240,
+                            "text": "SfM datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This coupled model also captures the variance profile of the dataset which is accounted for by spike timing jitter.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the DBLP dataset Puzis et al.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 20,
+                            "text": "DBLP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we plan to perform these experiments on a larger set of queries, and on other existing collections, such as the CLEF2017 TAR collection .",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 144,
+                            "text": "CLEF2017 TAR collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We present the datasets we use to evaluate our models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The EMNIST dataset 2 is used as the unlabeled dataset, which contains 814K hand-written letters and digits.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "EMNIST dataset 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, they are lossless, because it is possible to derive the identity and the support of every frequent itemset in the collection from them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and the 50 SRs in the entire dataset of CLEF17, as two sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report the mean accuracy (%) of 600 randomly generated episodes as well as the 95% confidence intervals on test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Segmentation Quality:  shows the segmentation benchmark accuracy of our closest ultrametric algorithm (denoted UM) along with the baseline ultrametric contour maps algorithm (UCM) with and without length weighting .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The sample AP data set from , which is sampled from a subset of the TREC AP corpus contains D= 2,246 documents with a vocabulary size V = 10,473 unique terms.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 22,
+                            "text": "AP data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 68,
+                            "end": 82,
+                            "text": "TREC AP corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on three TREC newswire and web collections demonstrate that the proposed modifications significantly outperform the baselines, in all cases.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 54,
+                            "text": "TREC newswire and web collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition to ObMan, to which we refer as (a) in , we render 20K images for two additional synthetic datasets, (b) and (c), which leverage information from the training split of HIC (d).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To analyze the proposed Gaussian-based pooling methods ) from various aspects, we embed them in the pool1&2 layers of the 13-layer network ) on the Cifar100 dataset  which contains 50,000 training images of 32 \u00d7 32 pixels and 10,000 validation images of 100 object categories; the network is optimized by SGD with a batch size of 100, weight decay of 0.0005, momentum of 0.9 and the learning rate which is initially set to 0.1 and then divided by 10 at the 80th and 120th epochs over 160 training epochs, and all images are pre-processed by standardization (0-mean and 1-std) and for data augmentation, training images are subject to random horizontal flipping and cropping through 4-pixel padding.",
+                    "annotation_spans": [
+                        {
+                            "start": 148,
+                            "end": 164,
+                            "text": "Cifar100 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Mix data set is to test whether an algorithm can detect arbitrary shaped clusters in a noisy data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "Mix data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Through extensive experiments based on an editorially judged CQA dataset, we show that our supervised learning method of content-agnostic vote calibration can significantly improve the performance of answer ranking and expert ranking.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the dataset was obtained by scraping cooking websites, the resulting recipes are highly unstructured and contain frequently redundant or very narrowly defined cooking ingredients (e.g. olive oil, virgin olive oil and spanish olive oil are separate ingredients).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For binary classification, we use the logistic likelihood and experiment with the Wisconsin breast cancer dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 113,
+                            "text": "Wisconsin breast cancer dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this way we obtain a much smaller initial dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(2) Task \u2022 a task-centered approach to conceiving test collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the 50 Salads-mid dataset to perform model analysis, because it has more fine-grained action types and longer video recordings than the other mentioned datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 32,
+                            "text": "50 Salads-mid dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed method has been validated on both simulated and real-world datasets, where it shows higher preference aggregation ability than the state-of-the-art methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unfortunately, real world datasets may be huge, so that these algorithms cannot store all the data in main memory.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 3, we propose the factor graph model to address the problem, and in Section 4, we conduct the experiments on the Twitter data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 140,
+                            "text": "Twitter data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on real datasets to show how our method performs in practical settings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, on the LFW Dataset, the unsupervised model obtains about 92% of accuracy and 79% IoU and the supervised model needs 50-60 labeled examples to reach similar performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 32,
+                            "text": "LFW Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In a market basket data all 'events' (purchases) in the dataset happen in the same time and we have multiple observations of the same variables (the set of all available goods).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "WikiQA dataset is an open-domain factoid answer selection benchmark, in which the standard pre-processing steps as  is employed to extract questions with correct answers.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 14,
+                            "text": "WikiQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our network outperforms ES-PNet by 4-5% and has 2 \u2212 4\u00d7 fewer FLOPs on the PASCAL VOC and the Cityscapes dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 111,
+                            "text": "PASCAL VOC and the Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Computationally, conditional reshaping is more tractable than identifying latent domains from the training datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Symmetrically, P S (y i ) is the collection of top-m most probable translations of the source language w.r.t.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When an existing test collection is to be analyzed, we can also upload the set of relevance assessments, which will be used to visualize the process of assessment.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Deblurring accuracy comparison on the dataset from .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use street-scene videos from three different geographical areas: 1) Germany, from the Cityscapes dataset , 2) Boston, collected using a dashcam, and 3) NYC, collected by a different dashcam.",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 107,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental Results on Public Data Sets..",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is the number of sentences in the Interest Corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 49,
+                            "text": "Interest Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The video summarization datasets  contain long videos arranging from different domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While we motivated this objective with notions of class balance and seperation, our approach may be interpreted as learning a conditional distribution for y that preserves information from the data set, subject to a complexity penalty.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TDT2 data set with k = 5, third row:",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 13,
+                            "text": "TDT2 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Like Biography.com, we include the most relevant biography we can find in the Interest Corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 93,
+                            "text": "Interest Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate qualitative results on real captured hands, and report quantitative evaluations on the FAUST benchmark for body registration.",
+                    "annotation_spans": [
+                        {
+                            "start": 102,
+                            "end": 117,
+                            "text": "FAUST benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Data used in this work were obtained from the ADNI database (adni.loni.ucla.edu).",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 59,
+                            "text": "ADNI database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Denote P T (x i ) as the collection of top-m most probable translations of the target language w.r.t.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(1) on the whole TIPSTER collection",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 35,
+                            "text": "TIPSTER collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the transfer learning performance of models initialized by different task-specific pre-trained models from Taskonomy dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 118,
+                            "end": 135,
+                            "text": "Taskonomy dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Scaling on Real-World Datasets Both CPALS and our implementation of GigaTensor are uniprocessor codes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, big data matrices over time also arise in several other real-life applications such as stock market monitoring where a business sector is characterized by the ups and downs of each of its companies per year or topic monitoring of document collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared with performances obtained in the Wiki dataset, all methods achieve better results when using CNN visual features in spite of the image query or the text query.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 55,
+                            "text": "Wiki dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also report segmentation accuracies on the PASCAL VOC 2012 test set (1,456 images) and the PASCAL Context validation set (5,105 images) for comparisons with state-of-the-art methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 70,
+                            "text": "PASCAL VOC 2012 test set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 94,
+                            "end": 123,
+                            "text": "PASCAL Context validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We verify the effectiveness of the proposed method on three synthetic noisy datasets, i.e., MNIST , CIFAR-10 , and CIFAR-100 , and one real-world noisy dataset, i.e., clothing1M : Means and standard deviations (percentage) of classification accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 92,
+                            "end": 97,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 100,
+                            "end": 108,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 115,
+                            "end": 124,
+                            "text": "CIFAR-100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 167,
+                            "end": 177,
+                            "text": "clothing1M",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report the average results of 10-fold cross validation on the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The LExR test collection comprises metadata records for 11,942,014 scientific publications associated with 206,697 candidate experts from all areas of knowledge working in multiple Brazilian research institutions spread all over the country.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 24,
+                            "text": "LExR test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments demonstrate the effectiveness of the proposed CAG-Net model on the challenging largescale VG dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 122,
+                            "text": "VG dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Details on all the algorithms and datasets used, as well as additional results and figures, can be found in Appendix B of the supplement.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The KD1,KD2 data sets were obtained from the KDDCup data set by discretizing the continuous attributes into 10 and 100 bins respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "KD1,KD2 data sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 44,
+                            "end": 60,
+                            "text": " KDDCup data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When searching a temporal document collection, e.g., news archives or blogs, the time dimension must be explicitly incorporated into a retrieval model in order to improve relevance ranking.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On a plethora of datasets , models with multi-modal attention , deep net architecture developments  and memory nets  have been investigated.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The GLUE benchmark, introduced a little over one year ago, offers a single-number metric that summarizes progress on a diverse set of such tasks, but performance on the benchmark has recently surpassed the level of non-expert humans, suggesting limited headroom for further research.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "GLUE benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consider now that from D, we can build two projected datasets D [A,B)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As depicted by the Document-Label-Overlap analysis, the HTML crawled documents most likely contain a broader range of terms compared to the title and description of the original ODP dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 178,
+                            "end": 189,
+                            "text": "ODP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We find that on the synthetic dataset, decreased penetration is systematically traded for simulation instability whereas for FHB C increasing \u03bb R from 0 to 0.5 decreases depth penetration without affecting the simulation stability.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The two KUKA datasets consist of rhythmic motions at various speeds, and represent a more realistic setting in robotics: While one can collect some data for offline training, it is not feasible to cover the whole state-space.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Embedded methods  \"embed\" feature selection as a part of the learning process without spiting the data into training and testing datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "imation based CF methods on the MovieLens (10M) and Netflix datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 68,
+                            "text": "MovieLens (10M) and Netflix datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To show the impact of the joint learning framework with our proposed DAL method, we conduct the ablative evaluations on several public AIFR datasets including FG-NET, MORPH Album 2 and CACD-VS.",
+                    "annotation_spans": [
+                        {
+                            "start": 135,
+                            "end": 148,
+                            "text": "AIFR datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 159,
+                            "end": 165,
+                            "text": "FG-NET",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 167,
+                            "end": 180,
+                            "text": "MORPH Album 2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 185,
+                            "end": 192,
+                            "text": "CACD-VS",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The image collections are typical real world personal image collections as they, for the most part, were taken during holiday trips.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Particularly, for heterogenous data sets, the resulting models are overly-complex with poor model interpretability and predictive accuracy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments, we also empirically confirmed our hypothesis for approximation via a permutation test on two large review data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar to , we empirically verify this hypothesis by training the PixelCNN model on the CIFAR10 dataset and then use its log-likelihood estimate combined with a p-value test to detect if an input image is from the original training/testing distribution or from the low probability density adversarial space.",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 104,
+                            "text": "CIFAR10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We designed a single neural network as fully convolutional structure for all three datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results for both CiteSeer and the other two datasets, Cora and Nova, are shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 29,
+                            "text": "CiteSeer",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 58,
+                            "end": 62,
+                            "text": "Cora",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 67,
+                            "end": 71,
+                            "text": "Nova",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "5-fold cross validation is carried out on the four ad hoc test collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When we trained on one dataset and tested with another, we made sure that the datasets share no common cameras.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Considering a collection of such models, oracle inequalities guaranteeing the quasi-optimality of the rule minimizing the penalized empirical risk can be then classically established by means of a slight modification of the argument of Theorem 2's proof, see e.g. Chapter 18 in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second dataset was the \"Enron\" dataset, which contains e-mail transactions among the employees of Enron.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 42,
+                            "text": "\"Enron\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both the Microso and Istella datasets use integer relevance labels in the range .",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 37,
+                            "text": "Microso and Istella datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the UCF101-24 dataset, we exploited the bounding box for the action detection provided in ,  : The top3 negative class accuracy on the Olympic Sports dataset and the UCF101-24 dataset averaged over 3 negative classes whose prediction probability is the largest, by changing the number of fully-connected layers.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 25,
+                            "text": "UCF101-24 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 170,
+                            "end": 187,
+                            "text": "UCF101-24 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 139,
+                            "end": 161,
+                            "text": "Olympic Sports dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The primitives generalise to other characters if the training dataset contained sufficient variation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We found various reasons for researchers to repeat the data collection step: getting a representative sample (I4: \"so that [first analysis] was followed by collecting new data, using the methods identified in the literature and guided by insights from the earlier analysis\"), getting context for a certain view",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We construct a citation worthiness dataset using the articles of ACL Anthology Reference Corpus (ARC).",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 101,
+                            "text": "ACL Anthology Reference Corpus (ARC)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For LSUN and Places365 datasets, we see that many times distillation methods performs better than SPV.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 31,
+                            "text": "LSUN and Places365 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each N , 20 independent simulated datasets are generated and the averaged results are plotted.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first two datasets, CiteSeer and Cora, are available at http://www.cs.umd.edu/projects/linqs/projects/lbc/.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 32,
+                            "text": "CiteSeer",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 37,
+                            "end": 41,
+                            "text": "Cora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested our approaches on the benchmark datasets from the SemEval-2016 task 3 on cQA , which we enriched with Arabic new questions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Second, they aid us in balancing the dataset distribution, mitigating its question-conditional priors and guarding against educated guesses (section 3.4).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(i) a DCT expansion; (ii) learning of D using the beta process construction, using training images; (iii) using the beta process to perform joint CS inversion and learning of D. For (ii), the training data consisted of 4000 8 \u00d7 8 patches chosen at random from 100 images selected from the Microsoft database (http://research.microsoft.com/en-us/projects/objectclassrecognition).",
+                    "annotation_spans": [
+                        {
+                            "start": 289,
+                            "end": 307,
+                            "text": "Microsoft database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the performance of RPMF over different contexts, we design three experiments using the \"MovieLens-100K\" dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 124,
+                            "text": "\"MovieLens-100K\" dataset.",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In a recent study using the Avocado collection, Sappelli et al.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 46,
+                            "text": "Avocado collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To validate various design choices of ours, we conduct ablation study on the ScanNet (v2) validation set and discuss the details below.",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 104,
+                            "text": "ScanNet (v2) validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly to previous work, for the Jester dataset we randomly select 4000 users and randomly withhold 2 ratings for each user for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 50,
+                            "text": "Jester dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can see that there are a large proportion of terms occur in less than 10 documents in all the three data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Considering the best baseline in each scenario and dataset, we find that our heuristics produce gains in P @5",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TraPHIC outperforms prior methods on dense traffic datasets with 10-30 road agents by 0.78 meters on the root mean square error (RMSE) metric, which is a 30% improvement over prior methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the PCE breast cancer data set, we can assess the accuracy of several prediction models using the most powerful of the predictor variables available in the data set (See ).",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 36,
+                            "text": "PCE breast cancer data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "EU dataset : This graph represents email data from a European research institute.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 10,
+                            "text": "EU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On benchmark dataset released by CLEF'17 eHealth Task 2, we show that the proposed SDR outperforms stateof-the-art solutions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The DukeMTMC-reID dataset contains 1,404 identities, 16,522 training images, 2,228 queries, and 17,661 gallery images.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 25,
+                            "text": "DukeMTMC-reID dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With promising results on AlexNet, we also looked at a larger, more recent network, VGG-16 , on the same ILSVRC-2012 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 124,
+                            "text": "ILSVRC-2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, as far as we know, there is no state-of-the-art approaches that use the benchmark datasets described in the previous section in the same way.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This section details our multi-layer SSL algorithm, extended from , targeting at large-scale datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As illustrated in , our BiC splits the stored exemplars from the old classes into a training set (train old ) and a validation set (val old ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first experiment is gender classification using features located on 58 annotated landmark points in the IMM face data set   ).",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 125,
+                            "text": "IMM face data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the lattice of frequent itemsets derived from the simple dataset reported in , mined with Downloaded 03/24/20 to 82.173.143.206.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Magnatagatune is a collection of approximately 21,000 music clips, each annotated with a combination of 188 different tags.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 13,
+                            "text": "Magnatagatune",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are three main steps to our approach: (1) learning a weakly-supervised region proposal network (W-RPN) on video collection V ;",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We empirically evaluate the ABC metric over MNIST and ImageNet datasets using (a) out-ofdistribution data, (b) adversarial inputs generated using digital attacks such as FGSM, PGD, CW and DeepFool, and (c) physically-realizable adversarial patches and LaVAN attacks.   , or relying on topological and manifold-based data analysis .",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 71,
+                            "text": "MNIST and ImageNet datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With our ChatNoir search engine we provide the second public API for reproducibly searching the ClueWeb09 corpus in TREC style experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 112,
+                            "text": "ClueWeb09 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the collection contained many botanical terminologies, it was more difficult for users to find the appropriate search terms by themselves.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We chose the ResNet-50  as our CNN backbone model which is pre-trained on the ImageNet dataset , and we removed the last fully-connected layer and the stride of last residual block is set to 1.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 94,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The neighborhoods for the two-syllable data set were thus examined.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a result, we obtain datasets of the form G-X, where G is any of AS, Grid, Geo and BA and X is any of the Ego, Uniform, Direct, and Inverse.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this model, the weight of a query term is calculated on the basis of the hypothesis that the more divergence there is between the within-document term-frequency and the term's frequency within the collection, the more the information is carried by the term in the document.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For testing on KITTI datasets, we first pad zeros on the top and the right side of the images to make the inputs in size 1248\u00d7384.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 29,
+                            "text": "KITTI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Musk 1 and Musk 2 data sets  are two biochemical tasks which directly promoted the research of multi-instance learning.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 27,
+                            "text": "Musk 1 and Musk 2 data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Wikipedia dataset, documents are associated with labels representing the categories of them.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 24,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, we detail the dataset, baseline methods, and evaluation metrics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We employed three benchmark point cloud datasets for experimental evaluation, which included ModelNet40 , ScanNet  and ShapeNet .",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 103,
+                            "text": "ModelNet40",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 106,
+                            "end": 113,
+                            "text": "ScanNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 127,
+                            "text": "ShapeNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This section presents qualitative and quantitative evaluation of our segmentation on 200 images from the benchmark Berkeley segmentation dataset (BSD) .",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 150,
+                            "text": "Berkeley segmentation dataset (BSD)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The top two clusters found on the Gene dataset with \u03b3 =",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 46,
+                            "text": "Gene dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments conducted over three large-scale real datasets indicate the proposed approach outperforms the compared methods for item recommendation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compared EASal with respect to other saliency models on the two aforementioned benchmark datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We examine two state-of-the-art generative models, StyleGAN  trained with the FFHQ dataset, and BigGAN  trained on ImageNet .",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 90,
+                            "text": "FFHQ dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 115,
+                            "end": 123,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We want to emphasize that since the CRU dataset has a high resolution and the underlying 3-dimensional grid has 7,146,520 nodes (67,420 nodes per year \u00d7 106 years), the drought detection problem is of large scale.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 47,
+                            "text": "CRU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We follow the conventional practice of LETOR research to label the dataset in an ordinal, integer scale .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In their work, they proposed a dataset with 6 image categories and 63,305 images for network dissection, where each image is labeled with pixel-wise semantic meaning.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Training cases were generated on the fly so the effective training set size was 50000 (the size of the MNIST training set)",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 121,
+                            "text": "MNIST training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Heterogeneous Datasets Representation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our model has been tested on several test collections in both English and Chinese.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This behavior can be observed on dataset 0046, which does not contain sharp turns.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the classic rotating-Teapot dataset, where the target output values are the cosine of the rotation angles.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 42,
+                            "text": "rotating-Teapot dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The two-sample test is a statistical test whose goal is to detect the difference between two data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset includes some annotations on relation evaluation between a state and s-NE.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When ground-truth 3D hand joint annotations are available (e.g STEREO dataset), this loss minimises the distance between the latter and the 3D hand joints generated by the hand model: where x 3D is a vector containing the ground-truth 3D hand joint coordinates.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 77,
+                            "text": "STEREO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The analogous priors are used for both other datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluating the efficacy of the proposed NetFS framework on real-world datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "compares the relative order between LSR and JSR for the top K results on 6 real datasets (K = 50, 200, 500).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To support our tricks is transferable to other dataset, we train a ResNet-50-D model on MIT Places365 dataset with and without the refinements.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 109,
+                            "text": "MIT Places365 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested the practical feasibility of the presented techniques on four real-world datasets whose size mandates fast graph kernel computation; two datasets of molecular compounds (MUTAG and PTC), and two datasets with hundreds of graphs describing protein tertiary structure (Protein and Enzyme).",
+                    "annotation_spans": [
+                        {
+                            "start": 180,
+                            "end": 185,
+                            "text": "MUTAG",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 190,
+                            "end": 193,
+                            "text": "PTC",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 276,
+                            "end": 283,
+                            "text": "Protein",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 288,
+                            "end": 294,
+                            "text": "Enzyme",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To the best of our knowledge, there is no publicly accessible dataset that contains both metadata and user demographic information.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The statistics of the cleaned dataset is given in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We create the first group of datasets by referring to , where many real-world scenes are included.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Cj phoneme representation vectors directly from the input data set, and without any learning or a map structure at all, establish distributed representations on these vectors with the SARDNET algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From  we can observe that McWL achieves the best (or comparable to the best) performance on several datasets across four evaluation metrics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we propose improved techniques for positional indexing in versioned document collections, building on the approach described in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The SCUT-CTW1500  is a challenging dataset for curved text detection.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "SCUT-CTW1500",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(b) Given many patient records, a diagnostic record dataset takes the following form. \"",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example,  shows the updated database D after modification of sequence 0 in the example database D. CISpan will model this update to a removal of sequence 0 in D and an insertion of sequence 0 in D .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the MovieLens 1 and MovieRating 2 datasets for the empirical analysis of our approaches.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 49,
+                            "text": "MovieLens 1 and MovieRating 2 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The network forward runtime is averaged over Matterport3D test set with input images of size 320\u00d7256 on NVIDIA GeForce GTX TITAN X GPU.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 66,
+                            "text": "Matterport3D test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct word similarity evaluation on the following benchmark datasets: WordSim353 , MEN  and SimLex999 .",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 85,
+                            "text": "WordSim353",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 88,
+                            "end": 91,
+                            "text": "MEN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 97,
+                            "end": 106,
+                            "text": "SimLex999",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on five multi-view multi-label datasets show that the proposed McWL achieves superior performance against state-ofthe-art approaches across various evaluation criteria.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": A multiple query experiment on a 60% sample of the NYT10 dataset yields the depicted total running time for the three approaches.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 66,
+                            "text": "NYT10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, the aforementioned training data refinement is not required for simulated datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In experiments, we demonstrate the state-of-the-art visual recognition performance on MIR-Flickr database and PASCAL VOC 2007",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 125,
+                            "text": "PASCAL VOC 2007",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 105,
+                            "text": "MIR-Flickr database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the LUCS/KDD dataset repository we take some of the largest and most dense databases.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 25,
+                            "text": "LUCS/KDD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our findings show that minimizing the bias across the document collection leads to good performance (though not the best performance possible).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first locate a strong bias in the classifier layer (the last fully connected layer), and then apply a linear model to correct the bias using a small validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we experimentally examine and discuss the practical impact that the additional protection from transfer risk allowed in generalized IDS games has on MSNE by solving several randomly-generated instances of SC+SS-type games with graph structures taken from several real-world datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we describe this machine learning approach and evaluate the accuracy of AtlasifySR+E's SR estimates against benchmark SR data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 135,
+                            "end": 147,
+                            "text": "SR data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "According to our statistics on three million sessions from the Sina Weibo Conversation Corpus, there are over 25 million unique words and",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 93,
+                            "text": "Sina Weibo Conversation Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The training details for each dataset are listed as follows:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Formally, we need to build predictions of f k (t) for t = T l+1 , \u00b7 \u00b7 \u00b7 T , from the knowledge of the entire fMRI dataset X , and the partial knowledge of f k (t) for the training time samples t = 1, \u00b7 \u00b7 \u00b7 , T l (see ).",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 121,
+                            "text": "fMRI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiment, we applied MSD, MusiXmatch and Last.fm tag datasets to extract features, as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 70,
+                            "text": "MSD, MusiXmatch and Last.fm tag datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recall that the BaiduQA dataset was preprocessed with Chinese word segmentation by the dataset provider .",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 31,
+                            "text": "BaiduQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For testing, we adopt 5 standard benchmark datasets: Set5, Set14, BSD100, Urban100 and Manga109, each of which has different characteristics.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 57,
+                            "text": "Set5",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 59,
+                            "end": 64,
+                            "text": "Set14",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 66,
+                            "end": 72,
+                            "text": "BSD100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 74,
+                            "end": 82,
+                            "text": "Urban100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 87,
+                            "end": 95,
+                            "text": "Manga109",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The scenes in our dataset include busy streets of metropolitan areas and crowded public parks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The primary difference is that our computational emphasis is not on the typically small, homogenous benchmark data sets in the statistical and machine learning literature ( , ), but on massive (outof-core), heterogenous training data sets (treated without sub-sampling) that are appearing in many emerging data mining applications.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have set the number of user-classes to 5 and 10 respectively for the MovieRating and MovieLens datasets as reported in previous empirical studies and Active Collaborative Filtering literature .",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 106,
+                            "text": "MovieRating and MovieLens datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the completed poses (17M skeletons) from the Kinetic dataset, our method achieved a MPJPE of 48mm on Human3.6M test data.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 66,
+                            "text": "Kinetic dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 107,
+                            "end": 126,
+                            "text": "Human3.6M test data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each data set, we initially withheld the last 2000 training examples as a validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This criteria corresponded to having a budget, B, of 200 for Nova and CiteSeer, and 400 for the Cora dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 108,
+                            "text": "Cora dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 61,
+                            "end": 65,
+                            "text": "Nova",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 70,
+                            "end": 78,
+                            "text": "CiteSeer",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This has yielded 39,160 tokens for London dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 49,
+                            "text": "London dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on KITTI  and our dataset demonstrates that our Driv-ingStereo makes stereo models more generalizable to realworld driving scenes.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 16,
+                            "text": "KITTI",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our curvature self-similarity descriptor in combination with the suggested embedded dimensionality reduction algorithm for the object detection task on the PASCAL dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 168,
+                            "end": 182,
+                            "text": "PASCAL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Crowdsourcing has become increasingly important for curating, labeling, and processing Web-scale datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 105,
+                            "text": "Web-scale datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The original Yandex document score is not provided in the log dataset; we hence use a document's rank in the Yandex returned rank list divided by 10 as its score.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 69,
+                            "text": "log dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Accordingly, we use the DAVIS 2017 training set, which contains the most examples of the fully annotated datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 47,
+                            "text": "DAVIS 2017 training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our spam filter dominated SpamBayes on the trec2005 corpus, and performed comparably to Bogofilter-one of the best spam filters to date .",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 58,
+                            "text": "trec2005 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the IMDB data set obtained from its portal 5 , we extracted actors and actresses participating in movies shown between 1990 and 2008.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 22,
+                            "text": "IMDB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", 600 query images with ground truth in the WDB database.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 56,
+                            "text": "WDB database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "grasp 7 objects in a merged reality environment to render synthetic hand pose datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The third dataset, ImageNet-20, consists of 28, 957 total images spanning 20 classes from ILSVRC2010.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 30,
+                            "text": "ImageNet-20",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 90,
+                            "end": 100,
+                            "text": "ILSVRC2010",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By their characteristics, these datasets give rise to very large numbers of patterns, and hence can only be considered by Krimp if we set minsup relatively high-implicitly limiting the detail at which Krimp can describe the data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The applicability of such a system would be, for instance, on stock photography sites, in e-shops searching in product photos, or in collections from a constrained Web image search.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(7) over large scale datasets, we linearly map the mean feature to a 128-dimentional vector before normalization as well as adopt noise constative estimation (NCE) and proximal regularization .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the reconstructions of some samples of the data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows how the nMI values improves with increasing fraction of the dataset being processed by h-vMF.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We prepared three real-world datasets for experiments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Because the test set doesn't currently include ground truth, we follow  and evaluate on the 'prototype test-set' with 733 dialogs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the special case of a Gaussian clustering of an unlabelled data set, it was demonstrated in  that gradient ascent on the likelihood of the data has an appealing interpretation in terms of backpropagation in an equivalent unit-Gaussian classifier network: for each input X presented to the network",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Speech retrieval test collections are particularly vulnerable to this effect, since changes in either the speech processing or the retrieval algorithms could result in new systems returning results from previously unassessed parts of the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Reported in , SDR+AES, taking advantages from both spaces, is the clear winner on all measures, on both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also evaluate on PASCAL VOC and MS COCO datasets with only 20/80 categories to compare with the state-of-art methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 51,
+                            "text": "PASCAL VOC and MS COCO datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Right: Accuracy of Naive Bayes (NB) and SVM classifiers vs. IDC on some of the data sets described in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our dataset, transactions are recorded as a [user ID, company ID, purchased amount] triplet.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this reason, we conclude that it is potentially risky to use the TREC-TS test collections outof-the-box.",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 93,
+                            "text": "TREC-TS test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To learn our SAM-Net, large-scale semantically similar image pairs are needed, but such public datasets are limited quantitatively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "two different types of parametric transformations, Thin plate spline (TPS) and Diffeomorphic nonlinear transformation, onto the images in Pascal VOC dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 138,
+                            "end": 156,
+                            "text": "Pascal VOC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Strong generalization results for the Netflix dataset are shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 53,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Through an array of experiments conducted over TREC corpora we show that our approach is more effective in detecting clusters containing a high relevant-document percentage than previous (state-of-the-art) methods .",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 59,
+                            "text": "TREC corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also use MPII dataset , the state-of-the-art benchmark for 2D human pose estimation, for pre-training the 2D pose detector and qualitatively evaluation in the experiment.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 24,
+                            "text": "MPII dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The users and their preferences are extracted from the MovieLens dataset , a dataset of 6k users who rated one million movies.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 72,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This indicates that CP-stream can learn trends from a noisy dataset with a small accuracy loss compared to the significantly more expensive batched CPD computation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Frequency (or document frequency) is hence proportional to the number of occurrences of the term in the document collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "propose a dataset that contains six categories of abnormally shaped objects, such as oddly shaped cars, airplanes, and boats, obtained from internet search engines that should be distinguished from regular samples of the same class in the PASCAL VOC dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 239,
+                            "end": 257,
+                            "text": "PASCAL VOC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Sized objects were taken from the COIL-100 dataset  with relatively uniform color distribution (objects 2, 4, 47, 78, 94, 97; all with zero degree rotation).",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 50,
+                            "text": "COIL-100 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, the dataset is partitioned horizontally, and local frequent itemsets are mined separately from each partition.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our approach not only outperforms all conventional baselines as well as many supervised recurrent neural networks, but also approaches the state of the art for supervised systems on three QA datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here we report results of models trained on the two Internet-scale datasets we collected with pseudo-ground :",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 75,
+                            "text": "Internet-scale datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, large unlabeled datasets are often easy to obtain, making semi-supervised learning methods appealing in various real-world applications.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The two main resources available for query recommendation are the document collection (including anchor logs)  and search logs , which can also be used as forms of implicit or explicit feedback to re-rank retrieved documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the lyric features, we collected the lyrics for the music video clips in the dataset from the lyric database  .",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 112,
+                            "text": "lyric database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our results and ablation studies show that both the discriminative modeling and the differentiable relaxation are crucial for the success of D 3 TW, which achieves state-ofthe-art results in both segmentation and alignment on two challenging real-world datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, we considered an artificial dataset to illustrate the effect of confounding factors on the solution quality of sparse inverse covariance estimation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Lastly, the space complexity of the algorithm depends only on the dataset and can be easily upper bounded.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the title text of the 50 topics for the Arabic and French collections, and we treated the 73 wellformed questions in NTCIR-8 as queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since  was trained on the NYUv2 dataset, it works slightly better when it predicts the images from the camera it was trained on (the point with the smallest error).",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 39,
+                            "text": "NYUv2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate all the SS prediction methods using the CB513 benchmark , which consists of 513 no-homologous proteins.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 67,
+                            "text": "CB513 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Datasets constructed thusly are often noisy, to counter this, several recent works  propose augmenting datasets via comparisons.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our dataset is about the users' music listening history, in which an item corresponds to a music track.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Performance here is worse than on previous datasets; for instance, the Fiedler vector is directly affected by topological gluing happening over long distances.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To illustrate the convergence, we used a synthetic dataset with = 5, 000 in order to ensure that all four algorithms converge within 100s.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "LETOR is split into six sub-datasets: HP2003, HP2004, NP2003, NP2004, TD2003, and TD2004.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 44,
+                            "text": "HP2003",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 46,
+                            "end": 52,
+                            "text": "HP2004",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 54,
+                            "end": 60,
+                            "text": "NP2003",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 62,
+                            "end": 68,
+                            "text": "NP2004",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 70,
+                            "end": 76,
+                            "text": "TD2003",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 82,
+                            "end": 88,
+                            "text": "TD2004",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We index all 50M documents of the ClueWeb09 corpus using the Terrier IR platform , including also the anchor text of incoming hyperlinks to each document.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 50,
+                            "text": "ClueWeb09 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As for the SYSU-MM01 dataset, the gallery consists of visible images, the results go the other way round.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 28,
+                            "text": "SYSU-MM01 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We trained a small recurrent neural network having 120 hidden units for the task of character-level language modeling on Penn Treebank corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 141,
+                            "text": "Penn Treebank corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All the datasets are standardized to zeromean and normalized by the standard deviation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The last section is intended to be a bridge from the tutorial material to collection ideas that the attendees bring with them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Examples of 64 \u00d7 64 generated images from complex layouts on COCO-Stuff (top) and Visual Genome Datasets (bottom) by our proposed method and baselines.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 104,
+                            "text": "COCO-Stuff (top) and Visual Genome Datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Concretely, we minimize the distribution differences between the latent domains in the test datasets and the domains in the training datasets, using the kernel-based measure explained in section 2.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the Clueweb09 setB dataset and the topic sets released at TREC2009-2011 Web Track diversity task as our test collection .",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 33,
+                            "text": "Clueweb09 setB dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 65,
+                            "end": 88,
+                            "text": "TREC2009-2011 Web Track",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The minimum occurs every n iterations, where n is the number of subjects in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We pruned Wide-ResNet  on CIFAR-10, CIFAR-100 and TinyIm-ageNet (with a width multiplier of 12 as per ), and ResNet50  on Mio-TCD , a larger and more complex dataset devoted to traffic analysis.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 34,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 36,
+                            "end": 45,
+                            "text": "CIFAR-100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 50,
+                            "end": 63,
+                            "text": "TinyIm-ageNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 130,
+                            "text": "Mio-TCD ",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This type of data is a very natural representation for the large datasets we used in our evaluations (section 5).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Such datasets are useful to illustrate the behavior of our measure for arbitrarily shaped clusters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An extension to this approach achieves significant improvements over Markov Random Field dependency models on TREC collections .",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 126,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The AS-GCN outperforms several state-of-the-art methods on two large-scale data sets",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This setting should not be confused with the Bayesian testing setup (e.g. as considered in ) where the patterns, x, are drawn at random.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, for every pair of nodes between which there is traffic according to the dataset, we place two directed edges between the nodes, one in each direction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This will enable studies about the longevity of test collection and standardization factors.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the EM and STARE dataset, we follow the metrics used in  that adopts F1measure along with correctness, completeness, and quality .",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 28,
+                            "text": "EM and STARE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These datasets capture a wide array of categories (animals, indoor and outdoor scenes, household objects, etc.) and attributes (parts, affordances, habitats, shapes, materials, etc.).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the NPB and MLB data sets contain various changes, MNB performed better than MNB-W and MNB-S; that is, learning stationary words with sufficient data is more important than responding to bursty words for these data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 34,
+                            "text": "NPB and MLB data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Features in Letor OHSUMED dataset consists of 'low-level' features and 'high-level' features.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 33,
+                            "text": "Letor OHSUMED dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following other works , we divided the overall dataset into two subsets, composed respectively of 29 and  .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For UCI datasets we use the raw features, on Caltech101 we use hierarchical features (hmax), and on Pubfig83 we use the feature maps from .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 45,
+                            "end": 55,
+                            "text": "Caltech101",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 100,
+                            "end": 108,
+                            "text": "Pubfig83",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have the input document cluster as DI , and the background corpus as DB which is all the other clusters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After receiving a query from the user the TopicVis web application retrieves a set of documents from a collection of over 3.69M English Wikipedia articles.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate our algorithm, we feed tooth CBCT images in our testing dataset to the two-stage network, and the complete 3D teeth model are reconstructed using 3D Slicer",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to other dimensionality reduction techniques based on eigendecompositions , MF enforces fewer restrictions on the choice of the basis and hence ensures larger representation flexibility for complex datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In ProbE, the overall model is defined by a collection of rules of the form \"IF < rule predicate > THEN < rule model > \".",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also use the OHSUMED collection and its topics.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 34,
+                            "text": "OHSUMED collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first used the MNIST dataset to demonstrate its feasibility with the simplest situation where the light scattered off an object reflects only once before reaching the camera, as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 31,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We ran 40 trials per category and two observers per trial, for the 88 categories in common between ImageNet, SUN and Places databases.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 133,
+                            "text": "ImageNet, SUN and Places databases",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let us consider the Pioneer dataset which contains sequences of a robot's actions when it is moving around.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 35,
+                            "text": "Pioneer dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically for a given task, the image database is obtained by querying a textbased image search engine using the keywords of the task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our method on the challenging Cityscapes benchmark and achieve top results (5% improvement over Mask R-CNN) at more than 10 fps on 2MP images.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 62,
+                            "text": "Cityscapes benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In several speaker-independent experiments with the DARPA Resource Management corpus, the hybrid system showed a consistent improvement in performance over the baseline HMM system.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 84,
+                            "text": "DARPA Resource Management corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i r j using their Jaccard 5 http://www.ibm.com/dogear similarity coefficient: The second dataset was collected from the Reality Mining dataset  project.",
+                    "annotation_spans": [
+                        {
+                            "start": 120,
+                            "end": 142,
+                            "text": "Reality Mining dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We wanted the scenario to provide participants with a rationale for searching articles from this time period and to make this characteristic of the collection salient to help ground participants' relevance judgments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on two benchmark datasets demonstrate its superior performance, especially on metrics that evaluate relative importance of salient regions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The face dataset presented in  consists of 185 images of three different people, each with three different facial expressions.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "face dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hereinafter, we thus assume that the dataset D to be subdivided does not contain infrequent items, while the frequent items have been re-mapped to [0, M ), where 0 (M \u2212 1)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 5 describes the extended TDT4 corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 44,
+                            "text": "TDT4 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "collect a dataset of drone crashes and train self-supervised agents to avoid obstacles.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments were run with the same initial model for data sets of various sizes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, we closely look at the results for the \"Enron\" dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 60,
+                            "text": "\"Enron\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Yelp business data set is a publicly available benchmark data set shared on Kaggle 1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 26,
+                            "text": "Yelp business data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Under the assumption that the person images (both in source and target datasets) share the same set of semantic attributes, the distribution distance of the attribute feature space between the source set and the target set ought to be minimize.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we evaluate our learning framework for non-parametric registration approaches on the OAI dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 105,
+                            "text": "OAI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "proposed a similarity-based time-profiled association mining in a time-stamped transaction dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each collection, we evaluated the three CLIR techniques (P r token , P r SCFG/PBMT , and P r nbest , with n \u2208 {1, 10}), exploring the effect of the different alignment heuristics as well as flat vs. hierarchical phrase-based translation models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, our approach outperforms state-of-the-art approaches on the standard VQA and Visual Question Generation tasks on the challenging VQA v2.0 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 142,
+                            "end": 158,
+                            "text": "VQA v2.0 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Previous inference algorithms for these models are mostly based on Gibbs sampling, which can be very slow, particularly for large-scale data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These kinds of ensembles are called jokingly Frankenstein ensembles and proved their benefit on versatile data sets in many machine learning challenges.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, we were not able to evaluate our methods based on public datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Global Scene Context: In a big data settings with hundred thousands and million examples, doing nearest neighbors could be a time-consuming process.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On three datasets, we demonstrate the benefit for visual category learning with zero or few training examples, a critical domain for rare categories or categories defined on the fly.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With Isolation Forest, AUC value on Covtype dataset is 0.88, which seems to be a good value.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 51,
+                            "text": "Covtype dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One the other hand, all the three models yield quite stable results on RCV1, which suggests that a largescale dataset may help alleviate the shortcomings of the basic term weighting schemes.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 75,
+                            "text": "RCV1",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive analysis and experiments on LFW, LFW BLUFR and MegaFace show that our method performs better than state-of-the-art methods using the same network architecture and training dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 41,
+                            "text": "LFW",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 43,
+                            "end": 52,
+                            "text": "LFW BLUFR",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 57,
+                            "end": 65,
+                            "text": "MegaFace",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our first dataset is drawn from the NIST Topic Detection and Tracking (TDT2) corpus which consists of news stories in the first half of 1998.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 83,
+                            "text": "Topic Detection and Tracking (TDT2) corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second dataset is the Yale Face Database B from .",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 46,
+                            "text": "Yale Face Database B",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We calibrate the proposed user model on the basis of the click log dataset provided by Yandex  (h p://imat-relpred.yandex.ru/en/).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Qualitative results from the DAVIS2017 test-dev and DAVIS2016 validation sets, where the images are sampled at the average intervals for each video.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 77,
+                            "text": "DAVIS2017 test-dev and DAVIS2016 validation sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In (a) we plot the divergence estimate between the three's and two's handwritten digits in the MNIST dataset (http://yann.lecun.com/exdb/mnist/) in a 784 dimensional space.",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 108,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the NPB and TV data sets, we selected noun, verb, and adjective words by using MeCab 0.98 with IPA dictionary 1 for Japanese word segmentation and part of speech labeling.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 28,
+                            "text": "NPB and TV data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Concretely, given an image sample , the discriminator network is trained to minimize the adversarial loss, i.e., maximizing the log-likelihood of assigning correct source to this sample: where and denote the collections of real images in training and synthetic images produced by , respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The current renaissance of 3D deep learning methods can be attributed to both the wide availability of cheap sensors for collecting 3D data and the release of large standard datasets of 3D objects .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Financial dataset downloaded from Yahoo Finance 2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 17,
+                            "text": "Financial dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the second dataset, ignoring the burst information renders all of events to be dependent, and it is expected, since all the events are inherently dependent because they follow the same a priori distribution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the AP marginal mean of systems on the whole TIPSTER collection (black dashed line) and on the sub-corpora (red solid line) together with their con dence intervals (shaded).",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 63,
+                            "text": "TIPSTER collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the ability to generalize to new concept compositions on the synthetic datasets, we train the models on CLEVR-CoGenT-A's train split and evaluate on the validation set without fine-tuning.",
+                    "annotation_spans": [
+                        {
+                            "start": 116,
+                            "end": 144,
+                            "text": "CLEVR-CoGenT-A's train split",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, we apply causal discovery algorithms to the simulated datasets with different practical issues:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this way, we can largely alleviate the data sparsity problem when applying topic models on large scale short text collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using a recently released public enterprise email collection, we formalize the task of prediction whether an email will be responded to and how long it will take to do so.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The precision and success plots of our UDT tracker with different configurations on the OTB-2015 dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 104,
+                            "text": "OTB-2015 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For brevity we put the results from GQA dataset in the Appendix C. To evaluate the efficacy of DAFT, we conducted experiments on two different criteria: performance (accuracy and run-time) and interpretability.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 47,
+                            "text": "GQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The utility values of effective features may differ substantially in various data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In  we report the most probable repeats for some topics, inferred on the AP dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 83,
+                            "text": "AP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However,  demonstrates that most multi-modal translation algorithms are not significantly better than an off-the-shelf text-only machine translation (MT) model for the Multi30K dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 168,
+                            "end": 184,
+                            "text": "Multi30K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this matrix, objects are the face images of the ORL collection while features are the pixel values of the images.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 65,
+                            "text": "ORL collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is the Billion Triple Challenge 2009 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 59,
+                            "text": "Billion Triple Challenge 2009 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Gmail, whereas we focus on the enterprise email collection, Avocado.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To this end, we utilize the Sitcom dataset  which contains pose samples captured from sitcom videos and train a human pose prediction model.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 42,
+                            "text": "Sitcom dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We propose the first online MF method accounting for convexity constraints on multi-cluster data sets, termed online convex Matrix Factorization (online cvxMF).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the case of every dataset for all measurements, except for the number of cluster blocks K \u00d7 L for the \"MovieLens\" dataset, the MLIRM significantly outperformed the other models.",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 124,
+                            "text": "\"MovieLens\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In an attempt to fill this gap, we present the first publicly available dataset that can be used for analyzing the evolution of opinions expressed towards different entities and more importantly for sentiment spikes' trigger detection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared with MD (Enhanced)  that uses the pixel-wise and local pair-wise distillation schemes over MobileNet, our approach with the similar network MobileNetV2Plus achieves higher segmentation quality (74.5 vs 71.9 on the validation set) with a little higher computation complexity and much smaller model size.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data set yields 54 families containing at least 10 family members (positive test) and 5 superfamily members outside of the family (positive train) for a total of 54 One-Vs-All problems.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We see that for 16 datasets Slim reaches this point faster than Krimp-in fact, several orders-of-magnitude faster for many of these, particularly for dense datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "their implementation is publicly available,  and because they are the only methods that were tested using both the FIRE collection  and Locke et",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 130,
+                            "text": "FIRE collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A segmentation model  computes segmentation masks for each realistic image in the dataset, then RGB patches belonging to the same semantic class are placed in the same memory bank.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Fingerprints had length 2048, and were calculated on pairs of molecules from the solubility dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 99,
+                            "text": "solubility dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On Shuttle dataset and Ann-thyroid dataset, the training sets are about five times of the labeling sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 42,
+                            "text": "Shuttle dataset and Ann-thyroid dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the EPINIONS dataset, an edge between two users u and v, denoted as \u27e8u, v\u27e9, represents that user",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 23,
+                            "text": "EPINIONS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Before training the agent, the two CNNs: ResNet and VGG, were pre-trained on the CIFAR10 and Fashion MNIST datasets, and after pre-training the parameters are fixed and not updated.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 115,
+                            "text": "CIFAR10 and Fashion MNIST datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We collected two activity datasets using wearable sensors, which include audio, acceleration, light, temperature, pressure, and humidity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The quadratic nature of this computation makes the use of the algorithm prohibitive for all but the smallest datasets, especially when the samples are high-dimensional.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments on large versioned data sets from Wikipedia and the Internet Archive show significant reductions in index size over  and  with very fast access speeds.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, despite being the best performing unsupervised method on DAVIS2016, the performance of ARP  drops significantly in the FBMS59  and Seg-TrackV2  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 79,
+                            "text": "DAVIS2016",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 132,
+                            "end": 165,
+                            "text": "FBMS59  and Seg-TrackV2  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our dataset contains 2,193 images and 144,512 head counts in total.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By covering the training data set well, we hope that when the resulting decision tree is applied for classification of a unseen sequence, the sequence may likely have at least one feature from A.  shows a GSDT example.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have collected the Computer Audition Lab 500-song (CAL500) data set by having humans listen to and annotate songs using a survey designed to capture 'semantic associations' between music and words.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 70,
+                            "text": "Computer Audition Lab 500-song (CAL500) data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, the run-time training complexity is O(N 2 ), which makes this approach computationally challenging for most modern datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our use, S denotes a dataset drawn randomly from the distribution D and Y denotes the output of a (possibly randomized) algorithm on S. We prove that approximate max-information has the following properties \u2022",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, when we merge n consecutive projected datasets in the partition D  , the number of items that can appear in the transactions of D [k,k+n) are:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As usual, participants will run their systems on the test collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "During training we draw random 64 \u00d7 64 pixel patches from this augmented training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Faces from the LFW dataset have been anonymized, in vizualisations only, to protect personality rights.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 26,
+                            "text": "LFW dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the corresponding recall plot with increasing value of k for the Genetics data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 88,
+                            "text": "Genetics data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the following three methods using Douban datasets: 1.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 60,
+                            "text": "Douban datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Additionally, we apply N2V training and prediction to three biomedical datasets: cryo-TEM images from , and two datasets from the Cell Tracking Challenge 2 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i are the experimentally measured and the predicted modifications of synaptic strength in the ith experiment; N is the number of data points (N = 18 for the visual cortex data set, N = 11 for the hippocampal data set).",
+                    "annotation_spans": [
+                        {
+                            "start": 196,
+                            "end": 216,
+                            "text": "hippocampal data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 157,
+                            "end": 179,
+                            "text": "visual cortex data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Training on Wikipedia results in lower scores on CONLL testing datasets compared to models trained on CONLL directly (See ) due to two main factors.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 71,
+                            "text": "CONLL testing datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 102,
+                            "end": 107,
+                            "text": "CONLL",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "According to the results reported in , LL+Exp and LL+Gaus perform comparably in the newswire collections, but LL+Exp outperforms LL+Gaus",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Without bells and whistles, a single HTC obtains 38.4% and 1.5% improvement over a strong Cascade Mask R-CNN baseline on MSCOCO dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 135,
+                            "text": "MSCOCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The OAI dataset consists of 176 manually labeled MRI from 88 patients (2 longitudinal scans per patient) and 22,950 unlabeled MR images from 2,444 patients.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "OAI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Analyzing , we see the performance of MC-LSH algorithm for 0%, 5% and 10% mismatch factor across the DS1 and DS2 datasets, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 121,
+                            "text": "DS1 and DS2 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comparing both solutions in terms of training time on our datasets, we also find that the most efficient technique depends on the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "That is, we train a single-view depth network from scratch using each dataset and measure the performance on DIW.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Assuming that w and b are chosen by an optimal learning algorithm on a sufficiently large training dataset, and that the test set follows the same distribution with parameter \u03b8",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The performance benefits of the resulting traversal are unprecedented, due to a cache-aware approach, both in terms of data layout and access patterns, and to a program control flow that entails very low branch mis-prediction rates; \u2022 an extensive experimental assessment conducted on publicly available LtR datasets with various \u03bb-MART models, differing for both the size of the ensemble and the number of tree leaves.",
+                    "annotation_spans": [
+                        {
+                            "start": 304,
+                            "end": 316,
+                            "text": "LtR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The effectiveness of the proposed approaches has been demonstrated by an extensive set of experiments on the federated search testbed of the Clueweb dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 141,
+                            "end": 156,
+                            "text": "Clueweb dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our method on three datasets, i.e., BSDS500, NYUDv2, and Multicue, and achieve ODS Fmeasure of 0.828, 1.3% higher than current state-of-the art on BSDS500.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 55,
+                            "text": "BSDS500",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 57,
+                            "end": 63,
+                            "text": "NYUDv2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 69,
+                            "end": 77,
+                            "text": "Multicue",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 159,
+                            "end": 166,
+                            "text": "BSDS500",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested these implementations on data sets from UCI machine learning archive .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One critical difference between attribute-value data and features in sequences is that, while an attribute is defined in every record of an attribute-value data set such as a relational table, a feature may appear in only a relatively small number of sequences.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We chose this approach since there exists exactly one date of granularity day per event in the evaluation data set, and the coverage of dates in Wikipedia is very good for the considered time frame , which reduces the chance of evaluating the completeness of Wikipedia or the NER tool.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Through the comparative experiment on the three datasets, it is easy to know that CUHK03 dataset with the new protocol would be the most challenging one because all methods make worse results on it.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 96,
+                            "text": "CUHK03 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the VOT-2016 dataset, the proposed tracker obtains the best accuracy score (0.55) and the secondbest EAO score (0.299).",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 24,
+                            "text": "VOT-2016 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Chess (kr-k) and Plants datasets were obtained from the UCI repository.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 36,
+                            "text": "Chess (kr-k) and Plants datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 60,
+                            "end": 74,
+                            "text": "UCI repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our proposed method using real datasets, and the experimental results demonstrate the effectiveness of our approach.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Excellent results on the rotated MNIST dataset and the ISBI 2012 2D EM segmentation challenge are reported.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 46,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate that our model learns accurate correspondence by evaluating it on the J-HMDB dataset , which requires precise matching of points compared to the coarser propagation of masks.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 99,
+                            "text": "J-HMDB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In each case, we randomly split each dataset into training and test sets using a 70:30 split, and learned all four models on the training set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Whereas, for the Enron corpus, we calculate the perplexity for the 1000 held-out documents that are sampled from the test sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 29,
+                            "text": "Enron corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our extensive experiments on two large publicly available datasets, i.e. the KITTI  and the Cityscapes  datasets, demonstrate the effectiveness of the proposed framework.",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 112,
+                            "text": "KITTI  and the Cityscapes  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Intuitively, every document in the text corpus is represented through \u03b8 d as a point in the topic simplex {\u03b8 | k \u03b8 k = 1}, and we hope that the linear transformation {T y } will be able to reposition these points such that documents with the same class labels are represented by points nearby to each other.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The artificial data set was sequence of two patterns, which we explain below, and included an anomaly where the occurrence frequency of each pattern changed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Among the three Chinese datasets, the proposed methods achieve the highest relative improvement in SogouT dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 113,
+                            "text": "SogouT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For VGG net, the 16-layer (13-Conv + 3FC) model is adopted to perform on CIFAR-10 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 89,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "See Appendix F for an experiment on a harder version of this dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, one new dataset is created from the Wikipedia collection of ClueWeb data.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 87,
+                            "text": "Wikipedia collection of ClueWeb data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This leads to the general and simple retrieval function: where t d w is the normalized form of x d w and \u03bbw is a parameter for the probability distribution of w in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In total, our train set contains 30,229 images and our test set contains 9,495 images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the ORL database with 12x12-pixel images, our circuit achieves up to 85% classification performance (98% of an equivalent software implementation).",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 22,
+                            "text": "ORL database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One is the TREC dataset based on the publicly available TREC queries (2008 Million Query Track).",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 23,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 56,
+                            "end": 68,
+                            "text": "TREC queries",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In smaller TREC collections, we have a sufficiently complete set of judgments that can be thought of as \"truth\", but no such set exists for the terabyte collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 27,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The FMD dataset is split to training and test sets, where the test set is composed of images randomly selected from the 19-th FOV of each imaging configuration and noise levels (the rest 19 FOVs are for training and validation purposes).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "FMD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We ran our reconstructive hashing algorithm on the Gist descriptors for the Tiny Image data set using 50 bits, with 1000 training images used to construct the hash functions as before.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 95,
+                            "text": "Tiny Image data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on a broad range of datasets validate the effectiveness of our approach against competitive algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We involved the same testing methodology as in : use MSN search engine as the source of frequency data and a random sample of 1400 sentences drawn from publicly available Enron data set of email messages.",
+                    "annotation_spans": [
+                        {
+                            "start": 171,
+                            "end": 185,
+                            "text": "Enron data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For evaluating our approach, we use 61 sequences of the training set in the KITTI object detection dataset , containing a total of 20k frames.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 106,
+                            "text": "KITTI object detection dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Illustrations of Algorithm 1 on three benchmark datasets (D2, D3, D4).",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 60,
+                            "text": "D2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 62,
+                            "end": 64,
+                            "text": "D3",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 66,
+                            "end": 68,
+                            "text": "D4",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A/B tests Similarly to the interleaving dataset",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As we have observed when preprocessing the two datasets, different types of entities are associated with different number of observations, and therefore they consist of different degrees of intrinsic complexity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the WIDER dataset, we remove anchors for different aspect ratios",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "WIDER dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Several other datasets from genomics  and health-care  contain causal relations among multiple variables and are commonly used for the evaluation; however, few pairs of ground-true causal relations are known/labeled by domain experts and the evaluation is not systematic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first sample two collections of words (denoted as D S and D T ) from the source and target languages, respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, comparing with DRCF, the recent DNN framework that exploits RNN models to capture the users' dynamic preferences, our proposed CARA architecture significantly outperforms DRCF by 6.53%, 5% and 13.22% in terms of NDCG for Brightkite, Foursquare and Yelp datasets, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 234,
+                            "end": 274,
+                            "text": "Brightkite, Foursquare and Yelp datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on ModelNet40, ShapeNet, and ScanNet show that deep convolutional neural networks built on PointConv are able to achieve state-of-the-art on challenging semantic segmentation benchmarks on 3D point clouds.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 25,
+                            "text": "ModelNet40",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 27,
+                            "end": 35,
+                            "text": "ShapeNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 41,
+                            "end": 48,
+                            "text": "ScanNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It achieves the state-of-the-art performance on many datasets",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This measure is suitable in particular for outlier rankings and outlier scores (posing other requirements as compared to the evaluation of rankings in IR and databases in general).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use two different datasets: tweets and the Enron email corpus .",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 64,
+                            "text": "tweets and the Enron email corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "remove watermarks using large image collections, which contain the same watermark, as well as some minimal user guidance about the watermark location.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "dataset is a recent largescale dataset with 1,400 sequences and more than 3.5M frames in total (the average frame length is more than 2,500 frames).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The same pairwise dependency analysis techniques were applied on these synthetic datasets to test the performance and stability of the proposed dependency analysis methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, most retrieval systems on botanical text collections are keyword-based and do not work effectively with such specific retrieval.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our BiC method achieves remarkably good performance, especially on large scale datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Active learning based sampling methods have demonstrated their outstanding performance in different datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Despite of rapid developments in plant molecular biology and molecular-based breeding techniques, the understanding of these complex interactions is still limited, and plant phenotyping has been identified as the main \"bottleneck in basic plant science and plant breeding\" : Phenotyping by means of sensor tech-nologies involves the identification of relevant patterns in huge data sets of high-dimensional data with a demanding signal-to-noise ratio.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then compare SogouT-16 with several similar datasets in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate the necessity of all components of our model by comparing the inception score, object classification accuracy, and diversity score of several ablated versions of our model trained on Visual Genome dataset: \u2022 w/o L img 1 reconstructs ground truth images without pixel regression.",
+                    "annotation_spans": [
+                        {
+                            "start": 198,
+                            "end": 219,
+                            "text": "Visual Genome dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use two email corpora: the first contains 65,000 publicly available emails received between 2000 and 2002 from the Enron corpus, the SpamAssassin corpus, Bruce Guenter's spam trap, and several mailing lists.",
+                    "annotation_spans": [
+                        {
+                            "start": 118,
+                            "end": 130,
+                            "text": "Enron corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 136,
+                            "end": 155,
+                            "text": "SpamAssassin corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data sets used in our experiments include six UCI data sets 1 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "only build their own test collections, but to support their use by measuring the properties of the test collection and including those gures in their work.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method achieves a strong 35 value of METEOR, which provides a 35.0\u221234.1  34.1 Results on MSR-VTT dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 108,
+                            "text": "MSR-VTT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To examine the quality of our simulated data, we mix 50 simulated records with 50 records sampled from the real-world dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data set contains 53 E. Coli promoters instances and 53 non-promoter instances.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The sparsity of user group \"20-50\" is 99.87%, and it is the sparsest dataset among three datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared with the state of the art ( ), our BiC method is more effective on large datasets with 1000+ classes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, we complement one of the largest and most challenging static computer vision datasets, VOC 2012 Actions, with human eye movement recordings collected under the primary task constraint of action recognition, as well as, separately, for context recognition, in order to analyze the impact of different tasks.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 110,
+                            "text": "VOC 2012 Actions",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To discover this kind of anomalous collections, we introduce a novel definition of anomaly, called Extreme Rank Anomalous Collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 132,
+                            "text": "Extreme Rank Anomalous Collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If the training data is perfectly ordered, then g is isotonic and we are done.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the first experiment we evaluate motion disambiguation on the first 2400 frames of a sequence from the Oxford Robot car dataset  and a sequence in the TUM RGBD",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 130,
+                            "text": "Oxford Robot car dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, we find that the recall rates of PC on both datasets are similar and the precision rate of PC on the simulated dataset is higher than the precision rate on the real dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On HTTP, Covtype, Mammography and Ann-thyroid datasets, the improvements are significant.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 54,
+                            "text": "HTTP, Covtype, Mammography and Ann-thyroid datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\u03b8 D w N } over the document collection vocabulary V =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the trainable VLAD models, we set the cluster center number K as 64 for MSVD dataset and 128 for MSR-VTT dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 88,
+                            "text": "MSVD dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 101,
+                            "end": 116,
+                            "text": "MSR-VTT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Bayes error between the three classes (Setosa, Versicolor, and Virginica) in the Iris data set  and used bootstrapping to calculate confidence intervals.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 94,
+                            "text": "Iris data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Further, all of the above mentioned research studies use fixed Twitter datasets collected at a certain point in time.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 79,
+                            "text": "Twitter datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The intended audience is advanced students who nd themselves in need of a test collection, or actually in the process of building a test collection, to support their own research.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In non-parametric classification, the probability of predicting image X as the i-th instance in the dataset is: wheref is the L2-normalized version off and \u03c4 is the temperature parameter.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To this end, we deployed the networks which were trained on the baseline-ImageNet dataset and fed them with 5000 images from the test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 89,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experimented on an ECG dataset because there are simple and obvious rules in such data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our collection spans from the 10th of April to the 31st of December 2015.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the performance of 3DSmoothNet for correspondence search on the 3DMatch data set  and compare against the state-of-the-art.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 92,
+                            "text": "3DMatch data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Na\u00efvely, many pairs of background images and lighting (HDR panoramas) would be required for training; however, capturing HDR panoramas is complex and timeconsuming, so no such dataset exists for both scene types.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Network Intrusion Dataset is a rapidly evolving dataset for which there is usually one dominant class in the stream over time (either the normal type connections or some attack type).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 29,
+                            "text": "Network Intrusion Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Wiki dataset the textual descriptions are based on Wikipedia surrounding paragraphs which yield a 5,000 dimension text space and in average there are 117.5 surrounding words per image.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 15,
+                            "text": "Wiki dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For Human3.6M dataset, we follow the standard protocol, i.e.,Protocol#1, to use all 4 camera views in subjects 1, 5, 6, 7 and 8 for training, and same all 4 camera views in 9 and 11 for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "Human3.6M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, we have applied a special case of this scalable framework to learn kernels over data sets containing nearly half a million images, as well as the MNIST data set of 60,000 data points .",
+                    "annotation_spans": [
+                        {
+                            "start": 159,
+                            "end": 173,
+                            "text": "MNIST data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Prediction Models on the TREC Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 37,
+                            "text": "TREC Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They showed consistent and significant improvements over document retrieval baseline on the longer FR collections, but the results were either negative or mixed on the shorter TREC24, TREC-45 and WSJ collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 113,
+                            "text": "FR collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 176,
+                            "end": 211,
+                            "text": "TREC24, TREC-45 and WSJ collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare our method with recent ZSL methods on two fine-grained image classification datasets, namely Caltech-UCSD-Birds  (CUB) and SUN dataset  and two coarse grained datasets (Awa1 and Awa2 ) following the Generalized ZSL (GZSL) protocol of .",
+                    "annotation_spans": [
+                        {
+                            "start": 180,
+                            "end": 193,
+                            "text": "Awa1 and Awa2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 104,
+                            "end": 145,
+                            "text": "Caltech-UCSD-Birds  (CUB) and SUN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, if the target dataset is large or the number of parameters is small, so that overfitting is not a problem, then the base features can be fine-tuned to the new task to improve performance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For experiments on new user prediction, we split our dataset into non overlapping sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To understand the effects brought by the parameters, we compare the spammer detection performance of the proposed CSD on the Twitter datasets with different parameter settings.",
+                    "annotation_spans": [
+                        {
+                            "start": 125,
+                            "end": 141,
+                            "text": "Twitter datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A small validation set ( 1 10 of exemplars) is good enough to estimate the bias parameters (\u03b1 and \u03b2 in Eq. 4).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To speed up learning, we subdivided the Netflix dataset into minibatches, each containing 100,000 user/movie/rating triplets.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 55,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also evaluated the performance of SimFusion+ on D-BLP and WEBKB datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 75,
+                            "text": "D-BLP and WEBKB datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the organizers' view, statistically-motivated methods can be established for designing and maintaining test collections and for quantifying progress.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show results for the following experiments: 1) We train and evaluate our model on the synthetic FlyingTh-ings3D dataset, and 2) also test it directly on the real-world KITTI Scene Flow dataset without fine-tuning.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 122,
+                            "text": "FlyingTh-ings3D dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 171,
+                            "end": 195,
+                            "text": "KITTI Scene Flow dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments in binary classification focused on these data sets because in previously reported benchmarks, they exhibited the biggest performance gap between deep architectures (e.g., deep belief nets) and traditional SVMs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A na\u00efve greedy implementation has O(N 2 d 2 K) complexity, where N is the dataset size, d is the feature space dimension, and K is the number of generated comparisons.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A large amount of research in operating systems and networking has focused on how to detect and eliminate redundancies in large data collections; see, e.g., .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Through extensive experiments on various architectures and datasets, we found networks are sensitive to the directions of Fourier basis functions of some specific frequencies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here we use 20% images of each scene in this dataset as training set and use the remaining as test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first dataset was the \"Animal\" dataset, which includes relationships between 50 mammals and 85 features.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 42,
+                            "text": "\"Animal\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the Digg.com data set, our approach yields a utility score of 0.393, much better than a tuned svm rank  (0.365).",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 24,
+                            "text": "Digg.com data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our method extensively against multiple widely accepted benchmark datasets of 3DMatchbenchmark  and Redwood , on the important tasks of feature matching and geometric registration.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Assuming that \u2303 is invertible, the matrix \u2303 1 , also known as the precision matrix, is often used to establish a degree of confidence we have in the data collection .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Critical difference plots derived from the results in  separated into (left) 17 UCR data sets and (right) 9 data sets contributed by the authors.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 95,
+                            "text": "UCR data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our demonstration, we aim at the task of retrieving images that are visually and semantically similar to a given example image; the system should be able to online evaluate k nearest neighbor queries within a collection containing tens of millions of images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "sen from the COCO training set, while validation and test images are chosen from the COCO validation set.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 30,
+                            "text": "COCO training set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 85,
+                            "end": 104,
+                            "text": "COCO validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the performance of FCI and RFCI on the dataset containing unknown confounders with that without confounders.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To the best of our knowledge, our dataset is the first public dataset containing both transaction meta-data and demographic information.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we perform an experimental evaluation of our approach based on large document collections from Wikipedia and the Internet Archive.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "dataset , and evaluated each model on Middlebury 2006 (MID 2006) , Middlebury 2014 (MID 2014) , and KITTI 2015 dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 118,
+                            "text": "Middlebury 2006 (MID 2006) , Middlebury 2014 (MID 2014) , and KITTI 2015 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each experiment, comparisons are based on the same validation procedure used in the appropriate original papers that introduced the datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When dealing with today's large-scale data sets, many data mining practitioners therefore often abandon deterministic approaches and resort to randomized approaches.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the MLB data set, we removed stop words and treated punctuation and other non-alphabetic characters as separate tokens.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 20,
+                            "text": "MLB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We construct our PIPA-relation graph dataset using two attributes (age and gender) from the attribute annotations published on the PIPA dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 131,
+                            "end": 143,
+                            "text": "PIPA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Now that the dimension of the problem has been brought down to 1, observations can be ranked and one may perform for instance a basic two-sample Wilcoxon test based on the data sets s(X",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have illustrated the results for the two synthetic data sets corresponding to Syn100.D500K.P(u) and Syn200.D500K.P(u).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, in our experiment we used 200 topics which is almost negligible in compared to the tens of thousands of word features a text collection typically has.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The significant performance improvement of deep neural networks in recent years is partly due to the wide availability of large labeled datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, the effects of the number of instances per bag and the number of relevant features on the performance of EM-DD algorithm are also evaluated using artificial data sets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our approach to combining the estimates of each constituent SR measure was to use a regression model to predict the human gold standard judgments in WordSim353, the most common SR dataset in the literature.",
+                    "annotation_spans": [
+                        {
+                            "start": 177,
+                            "end": 187,
+                            "text": "SR dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 149,
+                            "end": 159,
+                            "text": "WordSim353",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compiled two dataset-specific query workloads by extracting frequent queries from the AOL query logs, which were temporarily made available during 2006.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Close to this idea, TVQA  presented an alternative dataset for the task of understanding movies and plots.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The advantage of HF optimization is not prominent in the result of the experiments using the TIMIT database.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 107,
+                            "text": "TIMIT database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The arguments just presented do not depend on the distribution of the permutation values, only on the hypothesis that all permutations are equivalent (specifically, that the randomized datasets are exchangeable ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We extensively test GPFM on 5 benchmark datasets and compare it to two state-of-the-art context-aware collaborative filtering methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We asked five different workers on Amazon Mechanical Turk to answer 200 dataset questions from the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, we find that there are > 210K POS tag sequences in our training dataset D of |D| > 500K captions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We create a new data set for evaluating the new task and conduct experiments to show that the proposed SpecLDA outperforms a related state-of-theart model for extracting relevant review sentences.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Some further improvement (up to 12% in precision) can also be achieved with the RankSVM and GP based methods, although the best learning-to-rank strategy depends on the specific dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These works leverage relatively large scale datasets collected on participants' laptops and mobile devices in more general settings during daily life.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Obtaining corresponded training outlines, however, requires painstaking supervision and we would like to be able to use readily available simple outlines such as those in the LabelMe dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 175,
+                            "end": 190,
+                            "text": "LabelMe dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They were shown to work well for smaller data sets where they often outperform randomized selection strategies .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, the vocabulary of the PubMed datasets is significantly larger due to the medical terms that appear.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 43,
+                            "text": " PubMed datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Object Localization: We also tested DCL on weakly supervised object localization task on VOC2007 dataset using SPN .",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 104,
+                            "text": "VOC2007 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For experimental evaluation of our models, we use the recently published Stanford Sentiment Treebank (SST) , which includes labels for 215,154 phrases in the parse trees of 11,855 sentences, with an average sentence length of 19.1 tokens.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 100,
+                            "text": "Stanford Sentiment Treebank",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to evaluate and compare the proposed approaches, we adopt standard collections for the AOR task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Smoothing on document model is recognized to be crucial , and one of common smoothing methods is the Jelinek-Mercer interpolation smoothing: where \u03bb is an interpolation parameter and \u03b8 C the collection model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluating the performance of DeepCaps on several benchmark datasets: We significantly outperform the existing state-of-the-art capsule network architectures, while requiring a significantly lower number of parameters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, we observe that when using large unsupervised datasets, UAT++ performs nearly as well as the supervised oracle.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consider the computation of precision on CovType data set for rules with \u03f5 + R (S) =",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 57,
+                            "text": "CovType data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar discrepancy exists in the two other Airbnb datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To explore the capability of our method, we conduct intensive experiments on the datasets depicted in Section 3.3, and also use another open-source dataset with three real sequences (Face, jumping, and ball)  for comparison.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The significant improvements on this dataset validate the effectiveness of TEDnet.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast, 60% of LGL's toponyms are small cities, counties, and states, and among all three datasets, LGL contains the smallest fraction of countries and large cities, showing that LGL mainly concerns smaller, more local places, with a correspondingly smaller geographic audience.",
+                    "annotation_spans": [
+                        {
+                            "start": 184,
+                            "end": 187,
+                            "text": "LGL",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 108,
+                            "text": "LGL",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A document collection, called KBA Stream Corpus 2012 1 , has been developed specifically for this track.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We are working with challenge organizers from the COCO dataset committee and hope to run the first LVIS challenge at the 2019 COCO workshop, likely at ICCV.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 62,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To maintain the resolution of all datasets consistent, the sizes of cube map and ERP representations were adjusted based on the number of pixels on the equator of the SpherPHD image.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The online component is necessary for categorizing documents into topic-based clusters in real-time, whereas the intermittent batch processing is required for improved unsupervised mining on larger offline text collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "E2LSH is for Euclidean space and thus we used it to query the SIFT image descriptor dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 91,
+                            "text": "SIFT image descriptor dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Towards the goal of constructing a large-scale benchmark with high diversity, we proposed a hierarchical structure to organize our dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(4) Compared with GAT (Chebyshev filters with self-attention), DF-ATT also improves the results and achieves 0.4%, 0.6% and 3.3% higher accuracy on the datasets Cora, Citeseer and Pubmed, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 167,
+                            "end": 175,
+                            "text": "Citeseer",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 180,
+                            "end": 186,
+                            "text": "Pubmed",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 161,
+                            "end": 165,
+                            "text": "Cora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on two real-world multi-label datasets show that our Mpu model consistently outperform other commonly-used baselines.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, the dataset includes language models built from the body, title, and the anchor text of the web documents indexed by Bing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Label data sources: High-resolution (1m) land cover labels from the Chesapeake Conservancy , based on imagery from the years 2013-2014, and low-resolution (30m) land cover labels from the 2011 National Land Cover Database (NLCD) .",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 90,
+                            "text": "Chesapeake Conservancy",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 188,
+                            "end": 221,
+                            "text": "2011 National Land Cover Database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The recipes data set consists of ingredients from recipes taken from the computer cooking contest 1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "recipes data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the vocabulary size is much smaller and more stable than collection size in short texts, this correlation measure does not suffer from the sparsity problem.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TensorX describes the observed affinity among users, items and features in a training data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments demonstrate that our model improves upon the state-of-the-art performance on KITTI depth completion benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 131,
+                            "text": "KITTI depth completion benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The remaining part of the Wikipedia dataset was used to generate target texts for classifier evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 43,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we apply our drought detection algorithm to the entire CRU dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 75,
+                            "text": "CRU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the experiments in this paper, the tree was cut at level 1 to create four smaller trees, each of which corresponds to one smaller data set: Reuters-E Reuters-C, Reuters-M and Reuters-G.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We call the method pseudo label-guided sampling, and verified its effectiveness on OID and artificially created sparsely annotated data based on MS COCO .",
+                    "annotation_spans": [
+                        {
+                            "start": 145,
+                            "end": 152,
+                            "text": "MS COCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested the MCI-algorithm on a variety of synthetic data sets to verify its validity and assess its behaviour and performance in uncovering causal information from multiple models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The evaluations of natural law based clustering algorithms on Mix data set are presented in .",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 74,
+                            "text": "Mix data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, in the EN-to-NL direction, many irrelevant and general terms from EN queries are found in the NL target collection (e.g., words like words, document, telling) which lead LM-UNI and the combined model astray; (2) Since WE-VS is a vector space model, while LM-UNI is a probabilistic language model, their combination is executed posthoc, after obtaining the rankings separately and then normalizing the scores (see eq.",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 123,
+                            "text": "NL target collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset, the first block of methods are optimization-based, the second are base-class-corpus algorithms, and the third are metric-based approaches.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We propose novel learning methods to automatically reshape datasets into domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the INEX collection (where relevance assessments apply to document sections)",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 23,
+                            "text": "INEX collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For such collections, keywords or phrases are not very effective in identifying particular documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The third part reports the results of our method and the fourth part compares the performance using multiple POV videos on PEV dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 134,
+                            "text": "PEV dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and for the dynamically binarized dataset we sampled images from Bernoulli distributions with probabilities equal to the continuous values of the images in the original MNIST dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 169,
+                            "end": 182,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In both datasets, BUCM is overcome by all other co-clustering methods: this proves that a hierarchical structure provides substantial information for boosting the accuracy of prediction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The fourth dataset is sushi  which contains 50,000 ratings of 100 different types of sushi by 5000 Japanese users.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One of these approaches is to implicitly supervise the naturalness through the refined dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to better understand the role of regularization terms, we also conduct experiments on the Python dataset to observe changes in regularization terms.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 113,
+                            "text": "Python dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In many datasets, the variances of the clusters are inhomogeneous, where both sparse and dense regions exist in the same dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the contrary, our method has been shown to be useful and can achieve a speed up of 2 over TREC collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 109,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The corresponding results by training and testing in the ICVL dataset are shown in the fifth row of .",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 69,
+                            "text": "ICVL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For evaluating performance, to account for stochasticity in the training procedure, we do 5 rounds of training and evaluate each model on the labeled images from the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If so, how many clusters k are present in the dataset?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There has been an increasing move to automatically extract such representations for evaluating expertise from heterogeneous document collections .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To tackle these problems, we introduce a new dataset called \"COIN\" for COmprehensive INstructional video analysis.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The UCF-QNRF is a new dataset with one of the highest number of high-count crowd images and annotations.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 12,
+                            "text": "UCF-QNRF",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In doing so, we select the parameter \u03bb in (3.5) as the value that maximizes AUC on the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": we learn the ranking function and select parameter values in the validation set",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on three referring expression datasets: RefCOCO (UNC RefExp) , RefCOCO+ , and RefCOCOg (Google RefExp) .",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 70,
+                            "text": "RefCOCO",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 94,
+                            "text": "RefCOCO+",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 101,
+                            "end": 109,
+                            "text": "RefCOCOg",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also tested our approach on the scene recognition task using the datasets of , composed of two (Natural and Artificial scenes) datasets, each with 4 different classes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the 50 users' data sets, we selected 18 data sets for the evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For item j, we first obtain a collection of positive phrases about its feature k from R I j , i.e., R I j,k = o|(f ,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then, we respectively use the two transalated GTA5 datasets to train DeepLab-v2 models, which are evaluated on the Cityscapes dataset for semantic segmentation.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 59,
+                            "text": "GTA5 datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 115,
+                            "end": 133,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(left) shows the first frame of sequence \"Cars10\" from the Hopkins 155 dataset  with 100 false trajectories of Brownian motion added to the original data (297 points).",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 78,
+                            "text": "Hopkins 155 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show the experimental results on the CRU dataset in Section 4 and conclude in Section 5.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 51,
+                            "text": "CRU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the following, we describe previous work in Section 2, discuss our risk-aware ranking approach in Section 3, test our approach on five TREC test collections in Section 4, and conclude in Section 5.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although performance on VQAv2 and CLEVR are approaching human levels on these benchmarks, our results show VQA is far from solved.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We formulate three objective functions which, in the offline setting, are optimized by the projections of the input dataset onto its principal subspace scaled by the eigenvalues of the output covariance matrix.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We give a formal definition of what constitutes a quilted web page (a web page that is stitched together out of textual patches taken from other web pages) and prove a few properties of that definition; we provide an algorithm for exhaustively detecting all quilted web pages in a given collection; we describe a data-parallel implementation of the algorithm that scales to very large collections; we evaluate the effectiveness of the algorithm in detecting web spam by applying it (with various parameterizations) to the half-billion page English-language subset of the ClueWeb09 collection  and judging samples of the detected quilted pages as to whether or not they indeed constitute spam; and we suggest a few heuristics to improve effectiveness.",
+                    "annotation_spans": [
+                        {
+                            "start": 571,
+                            "end": 591,
+                            "text": "ClueWeb09 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, it is usually expensive and time-consuming to collect a large scale of Web pages in lab-based environment and public-available collection",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Human3.6M dataset  contains human actions of 17 scenarios, including 3.6 million poses and corresponding images.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "Human3.6M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We set k 1 and k 2 for each data set and label count to be the parameters which give the lowest average error rate for label propagation averaging over 100 trials and choosing from the set {5, 10, 50, 100}.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(3.1) requires the computation of all pairwise distances among the columns in X. For large data sets, this is ill-advised as it requires efforts of O(n 2 ) where n is the number of data samples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have shown this technique to be highly effective for compressing collections of related genomes  and large web collections , outperforming all other methods tested.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Capsule Network is a promising concept in deep learning, yet its true potential is not fully realized thus far, providing sub-par performance on several key benchmark datasets with complex data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This may indicate that in-class competitions undergo less quality control than the featured competitions (e.g., have smaller test sets), and that the quality control standards on Kaggle may change over time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that unlike partial label prediction where X is used as the input for both training and test sets, X is splitted into X obs for the training set and X unk for the test set, which implies",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The main properties of the ED and STS datasets are summarised in",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 46,
+                            "text": "ED and STS datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we show that the models trained with our proposed loss produces competitive retrieval accuracy results on benchmark datasets (CUB-200-2011 and CAR196).",
+                    "annotation_spans": [
+                        {
+                            "start": 139,
+                            "end": 151,
+                            "text": "CUB-200-2011",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 156,
+                            "end": 162,
+                            "text": "CAR196",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "C \u2286 R p , a data universe D, and a loss function : C \u00d7 D \u2192 R. A dataset D = {x 1 , x 2 \u00b7 \u00b7 \u00b7 , x n } \u2208 D n defines an empirical risk function:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is important to note that we have a graph dataset on AIDS drug data ( ).",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 70,
+                            "text": "AIDS drug data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, for the Web collections, the integration of the two sets yields substantial and statistically significant improvements of prediction quality over using either set alone.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 37,
+                            "text": "Web collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is seen that most of the aforementioned discussions made with GTA5 dataset can be carried over to SYNTHIA.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 77,
+                            "text": "GTA5 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It contains 43266 pairwise comparison data for quality assessment of 15 references from As there is no ground truth for the real-world dataset, we consider the results obtained by all observers as ground truth.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "that assigns weights to the source datasets according to their relatedness to the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar plots were obtained for the WT2g collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 51,
+                            "text": "WT2g collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the proposed FQN detectors, we perform a series of experiments on the COCO detection benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 106,
+                            "text": "COCO detection benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 3.1 provides error bounds on ML and KD-tree partitioning-based histogram density estimators constructed from N independent samples from a single joint posterior; modified bounds can be obtained for MCMC samples incorporating the mixing rate, but will not be considered here.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset consists of a 3-class problem, with the classes generated by the following equations: Where \u03b7 and \u03b5(t) are drawn from a standard normal distribution N(0,1), a is an integer drawn uniformly from the range",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on real-world datasets in Section 4, and then introduce related work in Section 5.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Datasets Related to Instructional Video Analysis: There are mainly three types of related datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform leave-one-person-out cross-validation on dataset 1 and leave-one-day-out cross-validation on dataset 2 and report the average the accuracies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast, the ClueWeb-B collection is stemmed using the Krovetz stemmer, which is a \"light\" stemmer, as it makes use of inflectional linguistic morphology  and is especially suitable for web collections where aggressive stemming can decrease precision at top ranks .",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 37,
+                            "text": "ClueWeb-B collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the OHSUMED collection is larger compared with the NIPS collection, so we varied the number of segment-topics from 50 to 150 in steps of 50 and word-topics from 150 to 490 in steps of 20.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 25,
+                            "text": "OHSUMED collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 54,
+                            "end": 69,
+                            "text": "NIPS collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To fairly compare with results provided in , we follow it to use the top left 1024 \u00d7 1024 image region in Harvard dataset and the whole image in CAVE dataset to perform the comparison on these datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 121,
+                            "text": "Harvard dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 145,
+                            "end": 157,
+                            "text": "CAVE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For our computer vision application we used a silhouette image from the Mythological Creatures 2D database  .",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 106,
+                            "text": "Mythological Creatures 2D database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast, the other 36 cluster quality measures considered here are only constrained to be positive and the observed ranges of variation depend strongly both on the choices of \u03b4(S, T ) and \u2206(S) and on the datasets considered.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the PAPER corpus, NetPLSA takes about 5 \u223c 10s per iteration while ToP takes about 10 \u223c 30s per iteration on a daily workstation depending on the parameters.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 19,
+                            "text": "PAPER corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In total, the dataset contains videos of 476 hours, with 46,354 annotated segments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We reduced the dimensionality of visual features in the SUN dataset from 19,080 to 1,000 by random feature selection, which is preferable to PCA for preserving the variance among visual features.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 67,
+                            "text": "SUN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this purpose we use two well-known datasets: the 1M version of the MovieLens collection, which includes one million ratings (on a 1-5 scale) by 6,040 users for 3,900 items; and an extract from Last.fm provided by \u00d2. Celma , including the full listening history of 992 users up till May 2009.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 91,
+                            "text": "1M version of the MovieLens collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 197,
+                            "end": 204,
+                            "text": "Last.fm",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments conducted on Facebook and Twitter datasets exhibit the satisfying accuracy and encourage more advanced efforts on POI search.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 54,
+                            "text": "Facebook and Twitter datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Contextual similarity has a higher score on the validation set of YouCookII, but lower on testing set, suggesting it overfits to the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since this distribution is not obtainable, we approximate the expectation with the empirical average over the observed testing datasets {T l }.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compared the performance of our CE-COLOC algorithm and the naive approach described in Section 5 using synthetic datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The FACC1 dataset is the first publicly available web-scale collection of entity linked documents.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "FACC1 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", the UC Merced land use dataset  covers just 7 km 2 , and the ISPRS Vaihingen and Potsdam dataset  contains fewer than 36 km 2 of labeled data.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 98,
+                            "text": "ISPRS Vaihingen and Potsdam dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is number of training instances, d is the number of features per instance and nt is the number of instances in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast to the Network Intrusion Dataset, where there exist timepoints for which both algorithms achieve a pu- rity of 100%, in the Forest Cover Type dataset, no algorithm achieves 100% purity for many time points.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 44,
+                            "text": "Network Intrusion Dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 136,
+                            "end": 161,
+                            "text": "Forest Cover Type dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments on ad hoc search were conducted on the TREC Web Track queries and two ClueWeb corpora.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train our neural networks on a proprietary data set of CBCT images collected by the radiologists in our team and validate our method with extensive experiments and comparisons with the state-of-the-art methods, as well as comprehensive ablation studies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Ten binary UCI data sets were considered: (#1)sonar, (#2)breast cancer prognostic, (#3)breast cancer diagnostic,",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 51,
+                            "text": "sonar",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 57,
+                            "end": 81,
+                            "text": "breast cancer prognostic",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 87,
+                            "end": 111,
+                            "text": "breast cancer diagnostic",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In some datasets, the number of training samples for each class are fairly unbalanced.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Versioned document collections are collections that contain multiple versions of each document.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We split this dataset into 2 training and 2 test sequences with each object appearing in both splits and restrict our predictions to the frames in which the minimal distance between hand and object vertices is below 5mm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further compare our method with a single large DCGAN, AdaGAN, and MGAN on the Fashion-MNIST dataset  mixed with a very small portion of MNIST dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 102,
+                            "text": "Fashion-MNIST dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 139,
+                            "end": 152,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, our objective is the opposite: we wish to destroy the structure present in the original dataset and look for large changes in the result, indicative of significant cluster structure in the original dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The trajectory statistics for the dataset are reported in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset  is a recent video collection providing 3D hand annotations for a wide range of hand-object interactions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results reported here are consistent with tests we performed using over 100 images from the aforementioned Microsoft database, not reported here in detail for brevity.",
+                    "annotation_spans": [
+                        {
+                            "start": 111,
+                            "end": 129,
+                            "text": "Microsoft database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, from the MLB data set, \"white sox\" is more suitable   for identifying the class #whitesox than \"white\".",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 34,
+                            "text": "MLB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also test CRTER's performance under the same em-pirical setting with BM25 over different BM25 parameters on all the six collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since there is no labeled dense semantics for the re-ID datasets, we leverage the DensePose model trained on the COCO-DesenPose dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 113,
+                            "end": 135,
+                            "text": "COCO-DesenPose dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 50,
+                            "end": 64,
+                            "text": "re-ID datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our method by learning to localize birds in the Caltech-UCSD Birds 200-2011 (CUB-2011) dataset  and exploiting attributes and segmentation masks as privileged information in addition to standard visual features.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 106,
+                            "text": "Caltech-UCSD Birds 200-2011 (CUB-2011) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then cluster both the original dataset and the m randomizations using the same clustering procedure and compare the results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further develop the model by using sparse Gaussian process techniques, which give improved performance and efficiency, and show competitive results against baseline methods when tested on the publicly available LETOR OHSUMED data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 214,
+                            "end": 236,
+                            "text": "LETOR OHSUMED data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Once search finished, models are constructed with different sizes to fit various tasks or datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conducted a series of 2, 000 simulations over this dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "set F in our problem and the collection C corresponds to review",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our 74 feature model trained on Twitter advertisement training dataset {x i , i } i",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 70,
+                            "text": "Twitter advertisement training dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "a random subset of 10,000 classes are selected from MS-Celeb-1M-base  face dataset which has 20,000 classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 82,
+                            "text": "MS-Celeb-1M-base  face dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Participants were asked to make on-line submissions on the validation and test sets, with performance on the validation set being presented immediately to the participant and performance on the test set presented to the participants at the workshop.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Among all the six datasets we have tested, Youtube and Gplus datasets demonstrate least cohesive structures, i.e., they consist of more follower-followee relationships than the other datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 69,
+                            "text": "Youtube and Gplus datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This assessment helps us understand earlier temporal IR studies, while also suggesting novel ways to incorporate time effectively into retrieval.  : TREC topics and collections analyzed by the authors and their use in prior temporal retrieval studies.",
+                    "annotation_spans": [
+                        {
+                            "start": 149,
+                            "end": 176,
+                            "text": "TREC topics and collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimental results show that our method outperforms state-of-the-art algorithms (iCaRL  and EEIL ) on two large datasets (ImageNet ILSVRC 2012 and MS-Celeb-1M) by a large margin.",
+                    "annotation_spans": [
+                        {
+                            "start": 128,
+                            "end": 148,
+                            "text": "ImageNet ILSVRC 2012",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 153,
+                            "end": 164,
+                            "text": "MS-Celeb-1M",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We propose an adversarial defense method that achieves state-of-the-art performance among attack-agnostic adversarial defense methods while also maintaining robustness to input resolution, scale of adversarial perturbation, and scale of dataset size.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply our ERACD on this preprocessed IMDB dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 52,
+                            "text": "IMDB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "statistically shows the percentage of node-pairs with the \"connectivity trait\" problem over all real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "YOLOv3 was trained on MS COCO, a dataset of bounding boxes corresponding to 80 different object classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 29,
+                            "text": "MS COCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we introduce how to use our simulator to generate datasets exhibiting different open problems.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To test RPMF's sensitivity to parameters: number of trees and height of trees, we conduct extensive experiments on MovieLens-100K dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 137,
+                            "text": "MovieLens-100K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Probability Ranking Principle (PRP) of information retrieval  implies that ranking documents in descending order by their probability of relevance produces optimal performance under a \"reasonable\" assumption, i.e. the relevance of a document is independent of other documents in the collection .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this subtask, we use 300-VW  dataset of continuous interview-style videos of 114 people taken in-the-wild as a source of training data.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 40,
+                            "text": "300-VW  dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The topics involving market and trade are also present in the background corpus, so their weights are reduced through contrast.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since partitions are built by merging dataset projections of contiguous single items, we can reformulate the problem as follows: given an item k \u2208 I, we have to find the Downloaded 03/24/20 to 82.173.143.206.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the case of SRAA dataset we inferred 8 topics on the training data and labeled these 8 topics for all the three classification tasks discussed above.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 27,
+                            "text": "SRAA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Large-scale synthetic data can be used to pre-train models in the absence of suitable real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section we consider an algorithm that clusters the data set and replaces the \u03a8 function with a normalized cut value.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Observation 1: The one-size-fits-all mean estimator can be a good approximation to all DTW_Ratios from a single dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ELSA was evaluated with the New York Times corpus for fifteen famous locations.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 49,
+                            "text": "New York Times corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we evaluate the quality of generators trained on two datasets:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can see that in the largest synthetic dataset, DC-QUIC is more than 10 times faster than QUIC, and thus also faster than Glasso and ALM.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comparing the Dunn and Davies-Bouldin indices across all of the datasets considered here, it appears that the Dunn indices generally yield better results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested SugarCube on the Blog06 collection .",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 44,
+                            "text": "Blog06 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this purpose, we use an external multi-class labeled dataset which we refer to as the reference dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The SARCOS dataset concerns an inverse dynamics problem of a 7-degrees-of-freedom anthropomorphic robot arm .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "SARCOS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows an example of the mining of coevolving spatial event sets with a small spatio-temporal dataset related to two time slots t 0 and t 1 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We decided not to use the validation set to pick features in a more fine-grained fashion (e.g. how many principal components, what kernel parameters) since we expect the optimal settings to vary for each test query, and relying on RankBoost's inherent feature selection ability is a more efficient and effective solution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The inconsistent feature distributions influence the attribute prediction on the re-id dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We illustrate the differences of median and mean type estimator on a synthetic dataset with the task of estimating a curve on the sphere, that is M =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this subsection, we use two time series data sets to test the efficacy of our methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For all methods (except constant) we empirically tune the parameters using one of the 5 folds as the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that our model is able to bypass the degenerated sequence problem usually encountered in previous work and outperform the current state-of-the-art across three evaluation metrics in two challenging datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Interestingly, the number favored by DWCV coincides with the number of datasets we mix, even though, as our experiments above show, the ideal domain boundaries do not coincide with the dataset boundaries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The precision plot with increasing value of k for the Genetics data set is illustrated in .",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 71,
+                            "text": "Genetics data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The CIFAR-10 dataset consists of 60,000 real world tiny images (32\u00d732 pixels), which can be divided into 10 categories and 6,000 images for each category.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A new dataset based on synthetic pop-out search arrays is proposed to compare deep and classical saliency models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first main conclusion is that including global aggregate features helps, for example improving the average classification rate on the MSRC dataset from 67.1% to 74.4% for the spatially uncoupled 'IND' model and from 80.7% to 84.9% for the 'CRF\u03c3' spatial model.",
+                    "annotation_spans": [
+                        {
+                            "start": 138,
+                            "end": 150,
+                            "text": "MSRC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments on real world datasets derived from Freebase demonstrate the effectiveness and efficiency of our approach.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use 70% of the relevant feedback in each dataset for training.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Digit Classification: For hand-written digit classification, we report our results on an imbalanced split of MNIST dataset in .",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 122,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our simulator is based on ground-truth causal relations regarding the domain knowledge, and its parameters are estimated with a real-world dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The property of being closed is, in fact, a global property of an itemset in the context of the whole collection of frequent itemsets of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that, a standard dataset, e.g., TREC, is not applicable because queries are not time-related, and judgments are not targeted towards temporal information needs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(1) Food-101N consists of 310k/25k train/test images, with 101 food categories of the original Food-101 data set .",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 112,
+                            "text": "Food-101 data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 4,
+                            "end": 13,
+                            "text": "Food-101N",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In each HIT(one assignment for each worker), 750 downloaded images are included for annotation, and an additional 30 positive samples and 30 negative samples with ground truth from the SUN database are also randomly injected as control.",
+                    "annotation_spans": [
+                        {
+                            "start": 185,
+                            "end": 197,
+                            "text": "SUN database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In general, such models employ a set of submodels (GP experts), each of which is trained on a subset of the full training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the validation sets to empirically tune the parameters of all methods, afterward the experiments are run 5 times using the tuned parameters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To demonstrate the effectiveness and generalization ability of the flow-guided video inpainting approach, we evaluate our method on DAVIS  and YouTube-VOS  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 132,
+                            "end": 164,
+                            "text": "DAVIS  and YouTube-VOS  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then the dataset was binarized so that a (person, movie) entry had value 1 if the person gave the movie a rating above 3 stars (from a possible 0-5 stars).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let R be a pre-defined number of latent topics, fine-tuned over the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "summarizes some of the most important statistics about the sentiment spikes of our collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Datasets We train our models on 213 487 images randomly selected from the Open Images Train dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 99,
+                            "text": "Open Images Train dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the 5 smallest datasets, we used a mini-batch size of 10 and 4 Monte-Carlo samples during training.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They are concise since a collection of closed itemsets is orders of magnitude smaller than the corresponding collection of frequents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, the network pre-trained on source dataset is not likely to be discriminative enough when it is applied to the target dataset directly.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results of the comparisons on Market-1501 dataset are presented on  and the results on DukeMTMC-reID dataset are shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 53,
+                            "text": "Market-1501 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 91,
+                            "end": 112,
+                            "text": "DukeMTMC-reID dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since we do not have access to the Google Sets algorithm it was impossible for us to run their method on our datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This approach effectively assumes that the topics were sampled from some population of (possible or actual) topics, but that the document collection is fixed in stone for all time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All datasets used in our experiments were manually labeled by three to five human annotators.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are many classic gait databases such as SOTON Large dataset , USF , CASIA-B , OU-ISIR , TUM GAID  and etc.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 65,
+                            "text": "SOTON Large dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 68,
+                            "end": 71,
+                            "text": "USF",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 74,
+                            "end": 81,
+                            "text": "CASIA-B",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 84,
+                            "end": 91,
+                            "text": "OU-ISIR",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 94,
+                            "end": 102,
+                            "text": "TUM GAID",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For all datasets, we performed 10-fold cross-validation and report average results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To demonstrate the effectiveness of IDDTW, we tested our algorithm on two datasets with varying properties.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For language models of documents from a reasonable large dataset, ci is generally much smaller than\u0109.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The FDA-Net achieves superior performance over the state-of-the-art approaches on all the evaluated vehicle ReID datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 11,
+                            "text": "FDA-Net",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 108,
+                            "end": 121,
+                            "text": "ReID datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Pathophysiological diagnosis refers to the original cause of symptoms where : Diagnostic records and dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since it is difficult to obtain the groundtruth cluster structure for real applications, we first verify the effectiveness of the proposed approach in obtaining the cluster structures of the proposed approach on simulated dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let the nt \u00d7 1 vector ft contain the scores transferred to the target corpus documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "; (2) train the clustering model on the labeled set, using the feature representation derived from the initial model;  apply the clustering model to group unlabeled data with various amounts , and thus attach to them \"pseudo-labels\"; and (4) train the final recognition model using the whole dataset, with both original labeled data and the others with assigned pseudo-labels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested the HME implementation on the ARPA 5,OOO-word Wall Street Journal corpus (WSJl, H2 dev set).",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 100,
+                            "text": "ARPA 5,OOO-word Wall Street Journal corpus (WSJl, H2 dev set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can observe that both Mpu and M3L can scale well to large-scale datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each dataset contains 1, 282 ground truth images in seven query categories, including colosseum, eiffel tower, golden, torre pendente di pisa, starbucks, tower bridge, and triomphe.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If the cropped image is large enough, the pair containing the cropped scene and the corresponding product is included in the CTL dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 125,
+                            "end": 136,
+                            "text": "CTL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to deep belief nets, the leading MKMs obtain slightly lower error rates on one data set and slightly higher error rates on another.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Does the dataset under consideration actually exhibit a natural cluster structure?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "PCA on the one hand revealed the presence of redundancy but, on the other hand for what concerns our goals, failed in isolating the correct descriptors, probably due to the small size of our dataset (70 recordings and 24 features).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Adult income dataset, also with protected gender attribute, our model achieves equality of opportunity by, among others, obfuscating the wife and husband relationship.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 27,
+                            "text": "Adult income dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For both datasets, our SpherPHD method outperforms other methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It can be seen ) that the random ensembles are usually clearly worse than the full ensemble, while the greedy ensemble provides a clear improvement, and also performs better than the full ensemble (the PenDigits data set is fairly easy, a much bigger improvement was not to be expected).",
+                    "annotation_spans": [
+                        {
+                            "start": 202,
+                            "end": 220,
+                            "text": "PenDigits data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Versioned document collections can become very large, due to the need to retain past versions, but there is also a lot of redundancy between versions that can be exploited.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows three examples from our in-the-wild dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This procedure can be used to assess the adequacy of existing and future gene expression time-course data sets for determ ining transcriptional regulatory relationships such as coregulation .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To this end, for the Forest Cover Type dataset we measure the average purity achieved by both algorithms.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 46,
+                            "text": "Forest Cover Type dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In a set of experiments performed over several TREC test collections, we compare the proposed models against existing SR models and show that using an importance prior within a LM framework delivers retrieval performance that significantly outperforms the current state of the art.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 68,
+                            "text": "TREC test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Background images are sampled from both the LSUN  and ImageNet  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 72,
+                            "text": "LSUN  and ImageNet  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For a dataset satisfying strict separation, let SL(d) be the tree returned by Single-Linkage.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Ground truth information about the extent of lakes was obtained via the Shuttle Radar Topography Mission's (SRTM) Water Body Dataset (SWBD), which provides a mapping of all water bodies for a large fraction of the Earth (60 o S to 60 o N) for a short duration of 11 days around Feb 18, 2000 (the closest date at MODIS scale).",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 132,
+                            "text": "Shuttle Radar Topography Mission's (SRTM) Water Body Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, all of these solutions result in a mere increase of the number of documents to be judged and therefore in an increase of the cost of the test collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The best compared method (separately for each dataset) had a median of 4.8% reprojection error worse than our method and required an additional median runtime of 74% compared to our method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the experiment designer selects a collection of experiments S = {S 1 , S 2 , . . . }.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In two recent papers, by Cormack and Lynam  and Robertson , it has been suggested that in Cranfield-style test-collection-based IR evaluations, we should in general consider the document collection used as a sample from a population (in addition to considering the set of topics or queries thus).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "His research interests lay in the intersection of retrieval, mining, and data management aspects of temporal Web collections like Web archives, Wikipedia and news collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 144,
+                            "end": 174,
+                            "text": "Wikipedia and news collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "re-id datasets and demonstrated competitive performance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this case regularization was performed by setting aside 1/4 of the data as a test dataset, and choosing a cutoff on the eigenvalues of the input covariances matrix that would give the maximal information value on the test dataset .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the results on the Enron dataset, which showed that our method improved search efficiency.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 38,
+                            "text": "Enron dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We again used 5-fold cross-validation for subtopic retrieval on the TREC subtopic collection to optimize the parameters, and the results are shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 92,
+                            "text": "TREC subtopic collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "More recently, with the increasing computing capability of modern GPUs and the availability of large-scale video datasets, 3D ConvNet (C3D) has drawn more and more attention.",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 133,
+                            "text": "3D ConvNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This includes typical saliency benchmark results for both fixation prediction and object segmentation based on MISS.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further observe that the absolute scores on the AOL dataset are lower overall which is mainly due to the disjointness nature of the query logs that we use to mine the aspects with the queries in the AOL logs.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 62,
+                            "text": "AOL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Webscope dataset (221367 rating events)",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 16,
+                            "text": "Webscope dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The performance of LB13 , GC14 , LB14 , WS16 , ZN18 , YG18 , and our proposed method on the 150 images in the controlled category of our dataset for the five error metrics is recorded in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the performance of supervised and semi-supervised training algorithms on these two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast,  shows the results obtained for this structureless dataset from the Davies-Bouldin index based on the same intercluster distance (complete linkage, \u03b4 2 (S, T ))",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In previous work, the relationship between the variance of parameters and collection statistics such as IDF (inverse document frequency)  has been studied.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Ablation study: To analyze the effects of multi-sensor fusion and multi-task learning, we conduct an ablation study on KITTI training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 119,
+                            "end": 137,
+                            "text": "KITTI training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the same fixed \u03b3 = 1 for our SIFT descriptor in all datasets: Scene-15, Caltech-101 and Caltech-256, although a more careful selection is likely to further improve performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 77,
+                            "text": "Scene-15",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 79,
+                            "end": 90,
+                            "text": "Caltech-101",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 95,
+                            "end": 106,
+                            "text": "Caltech-256",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We finalized a dataset as topic terms 1 airport terminal travel airlines delta gate tsa high mile gogo gogoinflight united southwest wifi baggage continental airplane handler airways 2 san technology apple office",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The two main resources available for query recommendation are the document collection (including anchor logs)  and search logs , which can also be used as forms of implicit or explicit feedback to re-rank retrieved documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following , we split the MSCOCO dataset, with 113,287 images for training, 5,000 images for validation, and the remaining 5,000 images for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 39,
+                            "text": "MSCOCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SVHN is a 10-way house number classification dataset, with 73257 training images and 26032 test images.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 4,
+                            "text": "SVHN",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, it can be used for rapidly developing efficient parallel data mining applications that operate on large datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use DBLP dataset and generate lists of 2-grams for strings.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 19,
+                            "text": "DBLP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report extensive experiments on PASCAL VOC 2007 , PASCAL VOC 2012  and MS COCO  datasets, to demonstrate that our ScratchDet performs better than some pretrained based detectors and all the state-of-the-art trainfrom-scratch detectors, e.g., improving the",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 91,
+                            "text": "PASCAL VOC 2007 , PASCAL VOC 2012  and MS COCO  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct our experiments on the dSprites dataset , which consists of all possible images of an object (square, circle or heart) varied by scale, position, rotation with labeled latents.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 50,
+                            "text": "dSprites dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Simulated interactive query expansion experiments on TREC Ad hoc and Legal track datasets not only confirmed this hypothesis, but also showed that automatically predicted P(t | R) probabilities (the complement of term mismatch) can accurately guide expansion to the terms that need expansion most, and lead to better retrieval than when expanding rare terms first.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 89,
+                            "text": "TREC Ad hoc and Legal track datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The optimal parameters for the final GBRT model are picked using cross validation for each data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, each domain has a different aspect such as resolution, facial expression, angle of light, etc, and one needs to know the intrinsic manifold structure of the image data set to translate between the domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Yale face database and the Extended Yale face database  together consist of pictures of 38 subjects with 9 different poses and 45 different lighting conditions.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 22,
+                            "text": "Yale face database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 31,
+                            "end": 58,
+                            "text": "Extended Yale face database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The empirical analysis of the proposed dependency detection methods was performed on the telecommunications alarm log data and on two synthetic datasets that were especially created to test the dependency detection algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Language models for example use the relative frequency of words in the document and the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "5.2, we derive a model by using our collected data (Sec. 3) for pre-training, and fine-tune with the concerned dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, within just a few years, the top-5 image classification accuracy on the 1000-class ImageNet dataset has increased from \u223c84%  to \u223c95%  using deeper networks with rather small receptive fields .",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 113,
+                            "text": "1000-class ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We averaged the results of 10 datasets and report them in this paper.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset consists of 12,887 QA pairs for training, 1,000 pairs for validation, and two tests sets containing 1,800 pairs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the image quality of the Messidor dataset is close to that of IDRID, even the pretrained lesion based model can obtain a substantial gain compared with the basic holistic classification model.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 47,
+                            "text": "Messidor dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the recently released Flickr-based Diversity in Faces (DiF) dataset  and the UTKFace dataset  as another source of darker-skinned faces.",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 99,
+                            "text": "UTKFace dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 29,
+                            "end": 74,
+                            "text": "Flickr-based Diversity in Faces (DiF) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For D2011 dataset, we obtained tweet feeds for Mar 2011 using the Twitter Feeds API 13 .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "D2011 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments on the standard CLEF CLIR collections for three language pairs of varying degrees of language similarity (English-Dutch/Italian/Finnish) demonstrate the usefulness of the proposed fully unsupervised approach.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 53,
+                            "text": "CLEF CLIR collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use a Mask R-CNN (implementation ) model pre-trained on the MS COCO dataset and re-train the head layers for each tool.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 78,
+                            "text": "MS COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Genuine signatures must have between 80% and 120% of the strokes of the first signature signed and, if readable, be of the same name as that typed into the data collection system.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By using Benchmark Corpus  to train the thresholds, we got their optimal values as: \uf071 equals to 0.02 and \uf071 \uf0a2 equals to 0.015.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 25,
+                            "text": "Benchmark Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results suggest that the modified model significantly outperforms the original log-logistic model, in all collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed architecture works well with the MNIST  dataset, nonetheless the performance on datasets with more complex objects such as CIFAR10  is not on par with the CNNs, due to the nature of complex shapes in CIFAR10 in comparison to MNIST.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 60,
+                            "text": "MNIST  dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 136,
+                            "end": 143,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 213,
+                            "end": 220,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 238,
+                            "end": 243,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Alice in Wonderland (AIW) dataset was preprocessed to remove all characters but letters and spaces, shift all letters from upper to lower case, and split along sentence dividers to yield a 27character alphabet (a-z and space).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 37,
+                            "text": "Alice in Wonderland (AIW) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our evaluation uses exclusively public datasets and our source code is released to the public as part of SPLATT, an open source high-performance tensor factorization toolkit. .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, due to their difficulty in modeling the inter-list loss and inefficiency on large scale datasets, list-wise CF approaches are not widely used compared to pair-wise in ranking-oriented CF.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Datasets We focus primarily on IMAGENET, the largest and most diverse image dataset commonly used to evaluate GANs.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 39,
+                            "text": "IMAGENET",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, it is difficult to collect such datasets with known ground-truth because underlying real-world causal relations are usually highly complex.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our implementations of stochastic algorithms, in order to mimic online learning scenarios, each dataset is randomly permuted before feeding to the solvers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our datasets include the textual features associated with 2,758,992 LastFM artists and with more than 9 million YouTube videos.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Overall, our soft ordinal labels improve the baseline by approximately 2% in class accuracy and reduce MAE by roughly 0.02 units in the Image Aesthetics dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 136,
+                            "end": 160,
+                            "text": "Image Aesthetics dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test our results on the Million Song Year Prediction Dataset (MSD) (Bertin-Mahieux et al., 2011) (n = 515344, p = 90) and the New York flights dataset",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 63,
+                            "text": "Million Song Year Prediction Dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 129,
+                            "end": 153,
+                            "text": "New York flights dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, how can one evaluate whether the graph datasets are similar and related?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "every record in the dataset has a time stamp) and users with too few records for learning their curiosity distributions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "PASCAL-VOC is a smaller dataset which contains 13K images from 20 classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 10,
+                            "text": "PASCAL-VOC",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hence, we conjecture that accuracy and marginal likelihood can be further improved by using a bigger dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "CineMatch, Netflix's recommender, achieves an RMSE score of 0.9514 on the Netflix dataset, which is outperformed by MudRecS.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 89,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then describe heuristic optimization algorithms for this problem that can scale to large document collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, TraPHic considers both heterogeneous-based and horizonbased interactions, and thus produces superior performance on our dense and heterogeneous dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SpatialMultiOmniglot is a dataset of paired images x and y, where x is an image of size (32m, 32n) comprised of mn Omniglot character arranged in a (m, n) grid from different Omniglot alphabets, as illustrated in .",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 20,
+                            "text": "SpatialMultiOmniglot",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2 1 (TDT2) is a collection of English news articles from various sources such as NYT, CNN, and VOA in 1998.",
+                    "annotation_spans": [
+                        {
+                            "start": 5,
+                            "end": 9,
+                            "text": "TDT2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "illustrates the attention weights over four hops for a random user from the Pinterest dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 93,
+                            "text": "Pinterest dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to provide a comprehensive evaluation, we compare our method with state-of-the-art methods on several datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Empirical studies on two real world data sets are also illustrated.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the table, we can observe that quotations are common in Web collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Attendees should come with a speci c current need for data, and/or details on their in-progress collection building e ort.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The NYU Depth v2 dataset  contains 1449 pairs of RGB and depth images, where 795 pairs for training and 654 pairs for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 24,
+                            "text": "NYU Depth v2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments reveal that our online cvxMF runs four times faster than its non-online counterpart on datasets with 10 4 samples, while for larger sample sets cvxMF becomes exponentially harder to execute.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This can be seen from results on HTTP and Ann-thyroid datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 62,
+                            "text": "HTTP and Ann-thyroid datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The authors first study a heuristic filtering procedures to clean the photo collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 86,
+                            "text": "photo collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "popular across the board, with a host of datasets being constructed  and numerous models being proposed .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Formally, Function Richness: Richness requires that by modifying the distance function, any partition of the underlying data set can be obtained.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the overlap percentage of top frequency words/characters between corpora and \"Web 1T 5-gram Corpus\" or \"Chinese Web 5gram\".",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 104,
+                            "text": "Web 1T 5-gram Corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 110,
+                            "end": 127,
+                            "text": "Chinese Web 5gram",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although lots of models have been proposed for recommending query refinement terms, most have exploited the document collection or previous search logs but not both.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We split our dataset into 60 training videos and 32 test videos.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These parameters are learned using a validation set, which will be the union of a set of complete-able (positive) triplets \u00a7 and a set of pairs which cannot be completed\u00a8(negative); that is \u00a4 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A further observation about this data set is that there is a lot of noise in the behavior of the distance functions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Observe that the BaiduQA dataset is much more sparser with a larger vocabulary and much shorter documents compared to the Snippet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 32,
+                            "text": "BaiduQA dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 137,
+                            "text": "Snippet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we downsample the questions based on their type to control the dataset type composition, and filter out redundant questions that are too semantically similar to existing ones.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Upper-Bound for both datasets is therefore likely a slight overestimate, as it uses half of these for training before before testing on all.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "BH-Free and BH-Forced were shown to achieve a competitive prediction accuracy on the Movielens and Netflix data sets, with respect to co-clustering competitors.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 116,
+                            "text": "Movielens and Netflix data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "m\u00b7j and m be the respective counts in the background corpus B. Statistical Model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have evaluated our CMRIN on three commonly used benchmark datasets for grounding referring expressions (i.e., RefCOCO , RefCOCO+  and Ref-COCOg ).",
+                    "annotation_spans": [
+                        {
+                            "start": 113,
+                            "end": 120,
+                            "text": "RefCOCO",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 123,
+                            "end": 130,
+                            "text": "RefCOCO",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 137,
+                            "end": 146,
+                            "text": "Ref-COCOg",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To comply with  we slightly diverge from these models for the ImageNet and MPII Composites dataset by using a sum formulation instead of the probabilistic expression, i.e. for attributes p(z n |x) \u221d .",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 98,
+                            "text": "ImageNet and MPII Composites dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Differently from other algorithms, DCI CLOSED exploits duplicate checking just looking at a subset of the original dataset stored in a vertical bitwise format.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, some feature selection methods can lead to more than 10% relative improvement over WAF for the .gov dataset, while most feature selection methods can only result in 1~2% or even less improvement for the OHSUMED dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 120,
+                            "text": ".gov dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 216,
+                            "end": 231,
+                            "text": "OHSUMED dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "when region number k of image i corresponds to a \"foreground object\" (visible in large part and from a category that occurs multiple times in the image collection), and 0 otherwise",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We introduce proximal graphical event models (PGEMs) as a representation of such datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experiment with the TREC datasets of the Web 2009 (queries 1-50) and",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 36,
+                            "text": "TREC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Static term features that are dependent on the target collection and background corpora are more important for selecting effective query terms than dynamic features derived from the stream of subtitles.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We outperform state-of-the-art methods on dense traffic datasets by 30%.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, it is notoriously hard to scale them to large datasets due to space and runtime complexity (typically O(n 2 ) and O(n 3 ), respectively, for most problems) ].",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One simple method is to test how the covariance structure of the perturbed data set matched with the original data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the size of the CAL500 data set is relatively small, we develop another test collection called TSII.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 31,
+                            "text": "CAL500 data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 95,
+                            "end": 99,
+                            "text": "TSII",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are 1,464 images in the training set and 1,449 in the validation set, and the left 1,456 images are for testing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, most GAN-based methods failed to provide performance improvement on the normal datasets (IF of 1 or 'Normal'), but our method even outperforms classifier on the MNIST and SVHN datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 171,
+                            "end": 194,
+                            "text": "MNIST and SVHN datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "; the data corpus was a 100GB sub-sample of the Web.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this way we use only data that is provided as part of the TREC collection, and therefore achieve the fairest and most reproducible possible comparison between evaluation measures.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 76,
+                            "text": "TREC collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They subsequently employ their sky model for estimating outdoor lighting from outdoor image collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply the proposed algorithm to the Caltech-UCSD Birds 200-2011 dataset, and obtain encouraging results suggesting further investigation into the benefit of privileged information in structured prediction.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 74,
+                            "text": "Caltech-UCSD Birds 200-2011 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset consists of ten instances of each class, and the classifier was evaluated using the \"leave one out\" strategy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then use convex optimization to learn the optimal linear combination of these kernel matrices, producing a single \"combined\" kernel which can be used by a support vector machine (SVM) to rank order test set songs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the ROC curves for the IJB-C dataset and the precision-recall curves for a image retrieval task on ImageNet-100.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 42,
+                            "text": "IJB-C dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 117,
+                            "text": "ImageNet-100",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We estimate the Conditional Probability Distribution (CPD) of each variable given its parents in the causal graph with the real dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that on more difficult datasets, e.g., sim-3, the Stream phase can accumulate errors along the way, as previously noted by [1]running intermittent Offline phases can correct these errors and improve the overall performance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Ian has built test collections for search, ltering, novelty, web, social media, intranet access and other domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These methods have investigated incorporating knowledge bases and retrieval methods into VQA datasets with a set of associated facts for each question.",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 101,
+                            "text": "VQA datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We re-used directly the hyper-parameters we have used to fit the Flowers dataset without further tuning and obtained, as preliminary results, a reasonable accuracy of 0.856 and an IoU of 0.691.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 80,
+                            "text": "Flowers dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As depicted in , and , we can clearly see that L1-MAX and L2-MAX achieve the best robustness on Amazon and Epinions datasets, while in Bookcrossing dataset L1-AVG and L2-AVG achieve the best performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 135,
+                            "end": 155,
+                            "text": "Bookcrossing dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 96,
+                            "end": 124,
+                            "text": "Amazon and Epinions datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly, there might be also a validation set V used for tuning parameters and \"learning\" recommendation functions (see Section 5.2).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We hope that the GQA dataset will provide fertile ground for the development of novel methods that push the boundaries of question answering and visual reasoning.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 28,
+                            "text": "GQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The correct choice of latent representation size d z in Eq. 9, in conjunction with a given dataset size can allow for quicker convergence and obtaining an estimate of the Wasserstein distance for the latent representation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "-I dataset under different parameter settings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The document collection is a crawl of the .gov domain.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is accomplished by training T () using a large dataset of images electronically-displayed and then cameracaptured using several combinations of cameras and displays.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A huge range of techniques, both parametric and non-parametric, exist for inferring body pose from 2D images and 3D datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data sets after preprocessing are summarized in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A comparison between datasets point out interesting differences: for Ohsumed, ODP with Title & Description and the Wikipedia dataset the overlap drops significantly with increasing depth while it decreases rather slowly for the ODP HTML dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 132,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 228,
+                            "end": 244,
+                            "text": "ODP HTML dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the same experimental protocols used by previous studies, we trained and tested the proposed DuRN-P using the training and test subsets (300 and 200 grayscale images) of the BSD-grayscale dataset.  .",
+                    "annotation_spans": [
+                        {
+                            "start": 184,
+                            "end": 205,
+                            "text": "BSD-grayscale dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Sintel dataset (figure 1) consists of a thousand pairs of frames from a highly realistic computer graphics film with a wide variety of locations and motion types.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "Sintel dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": The comparison of the number of closed association rules and the total number of rules discovered in the address dataset, using the three different methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "80% of each dataset is used for training, 10% for validation and 10% for testing respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In recent years, a number of datasets for instructional video analysis  have been collected in the community.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the SuperGlue benchmark , this procedure accounts for a 2.7 improvement in aggregate benchmark score using the same architecture as previous state-of-the-art submissions.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 26,
+                            "text": "SuperGlue benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To demonstrate the principal suitability of LS-DCUR for processing very large, dense matrices, we consider a data set of 80 million Google TM images .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We adapt standard labeling approaches, namely Maximum Term Frequency, Jensen-Shannon Divergence, \u03c7 2 Test, and Information Gain, to take use of those relationships and evaluate their impact on 4 different datasets, namely the Open Directory Project, Wikipedia, TREC Ohsumed and the CLEF IP European Patent dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 226,
+                            "end": 313,
+                            "text": "Open Directory Project, Wikipedia, TREC Ohsumed and the CLEF IP European Patent dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Lytro dataset is commonly used in view synthesis research, but the baseline of each capture is limited by the small diameter of Lytro Illum's lens aperture.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "Lytro dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Despite various efforts to use generic image features  or learn them , these hand-designed gradients are still widely used after a decade and support top-ranking algorithms on the Berkeley benchmarks .",
+                    "annotation_spans": [
+                        {
+                            "start": 180,
+                            "end": 199,
+                            "text": "Berkeley benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We present the ChatNoir search engine which indexes the entire English part of the ClueWeb09 corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 99,
+                            "text": "ClueWeb09 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The eventual test collection should serve as a benchmark that can be used to compare the performance of various techniques proposed for ILIR.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, we empirically evaluated the effect of our proposed method on two broadly used 2D images benchmark datasets, i.e., MNIST and CIFAR10.",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 129,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 134,
+                            "end": 141,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We sample 1,000 images from the ImageNet validation set and optimize the adversarial loss L for each of them using Adam  with learning rate 0.005 for a maximum of 400 steps to construct the adversarial images.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 55,
+                            "text": "ImageNet validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the CiteSeer dataset, which consists of complete object information (e.g. metadata, contents, and citations), we generate the direct influence degree of one article over the other by utilizing their contents (including metadata) and citations.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 24,
+                            "text": "CiteSeer dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct A/B-test among the three models in this user study, where the participants are divided into 6 groups by three models and two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Meanwhile, we achieve the state-of-the-art or competitive results on the three related datasets, NYUD-v2, SUN-RGBD and KITTI.",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 104,
+                            "text": "NYUD-v2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 106,
+                            "end": 114,
+                            "text": "SUN-RGBD",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 124,
+                            "text": "KITTI",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": F-measure evaluation on the BSDS500 benchmark , comparing to gPb on grayscale and color images, both for local contour detection as well as for global detection",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 47,
+                            "text": "BSDS500 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The original DBLP dataset does not contain citation relations.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 25,
+                            "text": "DBLP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on two large collections of Amazon and Yelp reviews confirmed the effectiveness of our solution in both recommendation and explanation tasks, compared with several existing recommendation algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ImageNet-1k is a large-scale dataset with 1,000 different classes, consisting of approximately 1.3 million training images and 50,000 validation images.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "ImageNet-1k",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our main result is that on an English to French translation task from the WMT-14 dataset, the translations produced by the LSTM achieve a BLEU score of 34.8 on the entire test set, where the LSTM's BLEU score was penalized on out-of-vocabulary words.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 88,
+                            "text": "WMT-14 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The evaluation of a relative validity index is usually performed as follows : (i) several partitions are generated with different clustering algorithms; (ii) for each clustering algorithm the ability of the new measure to identify the correct number of clusters, as defined by the ground truth partition of each dataset, is verified.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To obtain p(t|ex), we take the sum over documents d in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Yet, how to effectively cluster, especially on a large-scale (i.e. million-level or above) dataset, remains an open question.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Several challenge datasets included a very large number of features (up to 100,000) and only a few hundred examples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, the document collection approach might give too many unsuitable suggestions for current queries that have a very high document frequency leading to a statistical relationship with many of the terms in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For large document collections, the data stored in inverted indexes requires considerable amounts of space.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that the training of deep distance metric learning methods using the proposed upper-bound is substantially faster than triplet-based methods, while producing competitive retrieval accuracy results on benchmark datasets (CUB-200-2011 and CAR196).",
+                    "annotation_spans": [
+                        {
+                            "start": 228,
+                            "end": 241,
+                            "text": "CUB-200-2011 ",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 245,
+                            "end": 251,
+                            "text": "CAR196",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show results for seen test sets in supplemental material.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When two scholars have the same name spellings, their citation data are mistakenly merged into a single collection, leading to an incorrect citation analysis results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", reporting 65% improvements over BM25 measured by nxCG(10) on INEX IEEE collection with a different task where overlap is allowed.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 83,
+                            "text": "INEX IEEE collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows our results in comparison with the existing capsule network results and state-of-the-art results for the corresponding datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second part is used to compute the likelihoods of the contained words: We show the predictive probabilities as a function of effective passes through the data set in  for the New York Times, Arxiv, and Wikipedia corpus, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 206,
+                            "end": 222,
+                            "text": "Wikipedia corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 179,
+                            "end": 193,
+                            "text": "New York Times",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 195,
+                            "end": 200,
+                            "text": "Arxiv",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The document collection on an Intranet is relatively small and changes less frequently.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our evaluation is done on the ClueWeb09 Category B retrieval collection 1 , which contains roughly 50 million web pages in English.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 73,
+                            "text": "ClueWeb09 Category B retrieval collection 1",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "we define a novel querysimilarity subnetwork (QSimNet) to learn a query-guided re-identification score; v. we achieve a new state-of-the-art performance on CUHK-SYSU  and PRW  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 53,
+                            "text": "QSimNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 156,
+                            "end": 184,
+                            "text": "CUHK-SYSU  and PRW  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this experiment, we used the Movielens dataset again and chose 5 classes Mystery, Drama, IMAX, Sci-Fi, and Horror, and selected those elements who belonged to at most two categories and at least one category (i.e., \u2206 = 2).",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 49,
+                            "text": "Movielens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The aset400 data set presents a challenge not present in the synthetic data: It has a moderate amount of conflicting triplets, thus methods used in the evaluation must deal with noise within the data.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "aset400 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The NUS dataset, on the other hand, is based on Flickr user-provided tags which yield a 1,000 dimension text space and in average there are 7.73 words (tags) per image.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "NUS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Based on ApolloCar3D, we aim to develop strong baseline algorithms to facilitate benchmarking and future research.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, modeling the dataset as a single domain would necessarily blend the distinctions, potentially damaging visual discrimination.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is different from our approach in that we summarise data, instead of pattern collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To predict the performance of the runs in each track, we trained the Lasso regressor over the queries in the other two tracks that use the same collection; e.g., to predict the performance for TREC7, we trained over the TREC8 and TREC12 queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report the performance of SeqGROUND on the Flickr30K Entities dataset, and compare it with the stateof-the-art methods 3 in .",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 72,
+                            "text": "Flickr30K Entities dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results show that on 5 datasets DC-Pred++ achieves better prediction accuracy using the same prediction time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use two datasets: D2010 and D2011.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 26,
+                            "text": "D2010",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 31,
+                            "end": 36,
+                            "text": "D2011",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Typically, the amount of available data in such an organization (e.g., a university, a research institute, or a research lab) is limited when compared to the W3C collection that has mostly been used for the experimental evaluation of expertise retrieval so far.",
+                    "annotation_spans": [
+                        {
+                            "start": 158,
+                            "end": 172,
+                            "text": "W3C collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To show the effectiveness of our approach through comprehensive and fair evaluations, we compare our RF-Net with other methods with three evaluation protocols in two public datasets, HPatches  and EF Dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 183,
+                            "end": 207,
+                            "text": "HPatches  and EF Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the results on both data sets reveal an interesting trend: the multilayer arc-cosine kernels often perform better than their single-layer counterparts.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, if the documents matching w do not share a common topic, then this set of documents is analogous to a random sample from the document collection, with similar distributions of word occurrences.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast, the largest percentage of unique relevant documents contributed by a single participant in the MB2012 collection is just 20%.",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 125,
+                            "text": "MB2012 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the evaluation on natural image data we follow the example of  and take 400 gray scale images with 180 \u00d7 180 pixels as our training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We focus our results on the relatively new UCF Sports Action dataset, consisting of non-scripted sequences of cluttered sports videos.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 68,
+                            "text": "UCF Sports Action dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider the set of inputs on a full Cartesian product We will employ Gaussian processes (GPs) as nonparametric prior distributions over the latent function that generated the training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This can be done by constructing a labeled dataset of queries and answers, tagged by human judges based on how well they believe a query is satisfied by a given answer.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The GTA5 dataset  consists of 24, 966 synthetic images with pixel-level annotations of 19 categories (compatible with the Cityscapes dataset ).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "GTA5 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 140,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The rest of this paper is organized as follows: in Section 2 we describe the speech corpus we are going to work on, in Section 3",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From these figures, we have the similar conclusions with those on Spam Email datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 85,
+                            "text": "Spam Email datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that it is possible that a document selected to be judged might not actually have a judgment in the base collection's qrels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a person attribute dataset (source domain), a direct practice for generating attribute labels on re-id dataset (target domain) is training an attribute recognition model first and then predict labels on the re-id images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Hansard corpus (Canadian parliament proceedings, French version) is a stream of about 34 million words, of which 32 millions (set A) was used for training, 1.1 million (set B) was used for validation, and 1.2 million (set C) was used for out-of-sample tests.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "Hansard corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "k \u2264 k max of candidate cluster numbers, first partition the unmodified dataset D for this range of k values using any method M to obtain the collection {P k } of clusterings for k = k min through k = k max .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Jelinek-Mercer smoothing  uses a single coefficient \u03bb to linearly interpolate the maximum likelihood model with the collection model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, such methods have not been shown to yield significant robustness on complex datasets, or have not been subject to extensive robustness evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We fit models to 3D data sets of up to 14 million data points -timings are given in , where we also see that good compression ratios are attained, in that relatively few basis functions represent the shapes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We propose a new hybrid approach based on Na\u00efve Bayes inference that uses mixed n-gram models obtained from a training set to classify documents in the corpus.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments on Tweets and the Enron email dataset show that the proposed time-aware authorship attribution approach significantly outperforms baselines that neglect the dynamicity of authors.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 53,
+                            "text": "Enron email dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We directly use the DAL model trained on the small training set (0.5M images) and test on the FG-NET dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 108,
+                            "text": "FG-NET dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For qualitative and quantitative evaluations, we perform experiments on simulated data as well as three real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "proposed ISM which is based on SPADE, by maintaining a sequence lattice of an old database.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, after reordering the TREC GOV2 document collection, the distribution of DGaps has almost 60% of 1s, whereas a random ordering yields just 11% of 1s.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 64,
+                            "text": "TREC GOV2 document collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comparing to the best learning-based methods (e.g. GI has a noticeable heavy tail in its angular error distribution (e.g. amont the worst 25% cases), which suggests that GI would be more optimal if gray pixels would be i.i.d over the whole datasets (e.g. natural images).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the NIPS corpus we also compare these three methods to collapsed Gibbs sampling (CGS) and the CRF-style oHDP model (oHDP-CRF) proposed by .",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 18,
+                            "text": "NIPS corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation Collections Based on Crowdsourcing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "that is not in the collection, the multinomial topic distribution for the document is estimated using an inference technique.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also classes for training and test set are the same in traditional problem whereas in few-shot learning the two sets are exclusive.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "presents the confusion matrices for the proposed model with and without LSBP, on both the MSRC and UIUC datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 112,
+                            "text": "MSRC and UIUC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To address the statistical challenge arising from large fraction of rare categories in large-scale datasets, another approach, Hierarchically Regularized SVM (HR-SVM), was proposed by the same authors in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unlike the two, the SVHN dataset is a real-world Digits dataset obtained from house numbers in Google street view images and contains over 600k images (32 \u00d7 32 pixels) in total.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 32,
+                            "text": "SVHN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we also show age distribution of the CASIA-SURF dataset in the right side of  One can see a wide distribution of age ranges from 20 to more than 70 years old, while most of subjects are under 70 years old.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 68,
+                            "text": "CASIA-SURF dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the OSE to be useful in applications, it is crucial that applying it to a vector or a collection of vectors (a matrix) can be done faster than the intended downstream use.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The HTTP dataset and the SMTP dataset are two subsets of the KDD CUP 99 network intrusion data.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 37,
+                            "text": "SMTP dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "HTTP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can see from  that our model performed best when n = 2, and there were statistical differences (McNemar's test; p < .001) between n = 1 and n = 2 on NPB and  MLB data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 152,
+                            "end": 174,
+                            "text": "NPB and  MLB data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on a newly proposed large-scale ReID domain generalization benchmark show that our DIMN significantly outperforms alternative domain generalization or meta-learning methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 90,
+                            "text": "ReID domain generalization benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, scaling Prior Networks to complex datasets with many classes is difficult using the training criteria originally proposed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Network Intrusion dataset (KDD Cup'99) contains TCP connection logs from two weeks of LAN network traffic (424,021 records).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 29,
+                            "text": "Network Intrusion dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since this dataset includes longitude and latitude coordinates for each listening event, it is well-suited for locationaware music recommendation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As , we train the regression (to 5 ground truth landmarks) on the MAFL training set (19k images) and test on the MAFL test set (1k images).",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 83,
+                            "text": "MAFL training set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 113,
+                            "end": 126,
+                            "text": "MAFL test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the Walmart.com dataset, other than GRU4Rec and TransRec, we compare with an attention-based RNN model RNN+attn.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 26,
+                            "text": "Walmart.com dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then, a general classification method such as support vector machines (SVM) and artificial neural networks can be applied on the transformed data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Learning to Rank dataset  uses them as features for ranking.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 24,
+                            "text": "Rank dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our SPNet on the zero-shot image classification task on three benchmark datasets, i.e. CUB",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Seven of them contain ham emails received by distinct Enron employees from the Enron corpus ; we use the individuals with the largest numbers of messages from a set of mails that have been cleaned from spam.",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 91,
+                            "text": "Enron corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "when ranking test sets much smaller that the Reuters-21578 test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 67,
+                            "text": "Reuters-21578 test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First we provide the overall performance of the compared approaches on the three datasets, shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We divided EMOd into training set containing 776 images and test set containing 243 images.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 15,
+                            "text": "EMOd",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given the huge number of features in many real world data sets, such hints as to feature relevance can produce much more sensible clusters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This provides insight into the nature of the relationship between the original data set and perturbed data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We describe an effective learning scheme, capable of tackling large-scale problems, and validate it on the Recipe1M dataset containing nearly 1 million picture-recipe pairs.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 123,
+                            "text": "Recipe1M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Numerical experiments on UCI, Extended Yale B, CIFAR-10 and CIFAR-100 datasets indicate that our proposed approach is able to learn image representations that exhibit high task performance while mitigating leakage of predefined sensitive information.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 78,
+                            "text": "UCI, Extended Yale B, CIFAR-10 and CIFAR-100 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, data sets in areas such as optimization, computer vision, machine learning or statistics often live in spaces of dimensionality in the order of thousands or millions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The jth record of the ith data set is denoted by X j (i).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "LDA is an unsupervised generative probabilistic model for collections of discrete data such as text documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experiment with pre-training on variants of our synthetic dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let D be a data set of items, and x \u2208 D be an item from this set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The efficiency results for the DBLP and Citation data sets are illustrated in  and (e) respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 58,
+                            "text": "DBLP and Citation data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments are performed on our Turkish Broadcast News (BN) Corpus which also incorporates a spoken IR collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 67,
+                            "text": "Turkish Broadcast News (BN) Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train HAE on the Google-Landmarks dataset  and evaluate in total 16 different types of widely-used features in visual search community .",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 44,
+                            "text": "Google-Landmarks dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also introduce a new benchmark based on Atari 2600 games where we evaluate representations based on how well they capture the ground truth state variables.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is about 520 MB large, and is available from the UCI machine learning repository.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 92,
+                            "text": "UCI machine learning repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Challenge uses search queries received by Bing in March 2010 as the final test set with | | 1500.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Empirically, our approach achieves state-of-the-art results on the H3.6m benchmark dataset with a much enhanced long-term proficiency, capable of predicting natural human-like motions over 50 seconds, and works well on animal datasets such as fish and mouse.",
+                    "annotation_spans": [
+                        {
+                            "start": 67,
+                            "end": 90,
+                            "text": "H3.6m benchmark dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each SVM, we used the last 2000 training examples as a validation set to choose the margin penalty parameter; after choosing this parameter by cross-validation, we then retrained each SVM using all the training examples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results in  indicate that gHMM outperforms Lueck-2011 significantly on recall and F1 on the TREC dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 108,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We cluster the transition matrix A from the Markov chain, in order to get a clustering of the web pages in the data collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A common sense knowledge graphs is generated with the help of the statistics of the annotations in the VG dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 113,
+                            "text": "VG dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With a multi-lingual Wikipedia document dataset, we examine whether the proposed method can find the correct matching between documents written in different languages.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conducted experiments on the Epinion data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 48,
+                            "text": "Epinion data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Take fraud dataset as an example, the relative performance improvement on Macro-P, Macro-F1, and Jaccard by our model is around 3.1%, 0.5%, and 0.5%, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 5,
+                            "end": 18,
+                            "text": "fraud dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We are also given a video collection V = {V 1 , . . . , V M }.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second data set is 22K LabelMe used in  which contains 22,019 images sampled from the large LabelMe data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 34,
+                            "text": "22K LabelMe",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 96,
+                            "end": 112,
+                            "text": "LabelMe data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Last but not least, we conduct experiments on both synthetic data and various types of real-world datasets to demonstrate the effectiveness of the proposed framework.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, for desc queries on the GOV2 collection, PQE achieves 24% and 17% improvement in MAP over QL and RM, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 53,
+                            "text": "GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, many prior methods have been evaluated on traffic videos corresponding to relatively sparse scenarios with only a few heterogeneous interactions, such as the NGSIM  and KITTI  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 171,
+                            "end": 197,
+                            "text": "NGSIM  and KITTI  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This suggests one unified approach that explicitly performs inference optimization at each step, replacing the current collection of custom filtering methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In other words, a term t i can be viewed as a document vector (x i,1 , ..., x i,N ), where N is the number of documents in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ED corpus represents a realistic sample from a stream of tweets, whereas STS was intentionally manipulated to over-represent subjective tweets.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 13,
+                            "text": "ED corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the results on the standard NGSIM dataset and an additional dataset containing heterogeneous traffic of moderate density.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 47,
+                            "text": "NGSIM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although DP is a collection of general methods for solving Markov decision processes (MDPs), these algorithms are computationally infeasible for problems with very large state sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to get a better idea of how locality based functions are affected by the uncertainty, we will examine the behavior of the synthetic data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Ultimately, we believe it will be necessary to build such a collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both WorldExpo'10 dataset and UCSD dataset provide example with sparse crowd scenes in the form of ROI regions.",
+                    "annotation_spans": [
+                        {
+                            "start": 5,
+                            "end": 25,
+                            "text": "WorldExpo'10 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 30,
+                            "end": 42,
+                            "text": "UCSD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "CTPF outperforms the Decoupled PF model and the Content-only model on all data sets except on cold-start predictions on the arXiv data set, where it performs equally well.",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 138,
+                            "text": "arXiv data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let I = {a 1 , ..., a M } be a finite set of items, and D a dataset containing N transactions, where each transaction t \u2208 D is a list of distinct items t = {i 1 , ..., i T }, i j \u2208 I.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the history collection, we clean the data by only keeping those frequent, well-formatted, English queries (queries which only contain characters 'a', 'b', ..., 'z', and space, and appear more than 5 times).",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 25,
+                            "text": "history collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also evaluate our approach on the MS COCO dataset , which is among the most challenging datasets for instance segmentation and object detection.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 52,
+                            "text": "MS COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results are shown after 1000 line optimizations (D = 900), and performance on the test set was checked after every line optimization.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MSD dataset was used in the Kaggle Million Song Dataset Challenge  and consists of listening histories for 1.2M users and 380K songs.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "MSD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is modeled formally by a Hidden Markov Model : where the probability of selecting a term t given the query language model \u03b8query is a mixture between selecting from the topic language model \u03b8topic and the background collection model p(t), which is controlled by \u03bb.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The result is shown in the last case for each dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each problem, we apply two different datasets from the LIBSVM : the a9a dataset (n = 32561, d = 123) and the w8a dataset (n = 49749, d = 300).",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 83,
+                            "text": "a9a dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 113,
+                            "end": 124,
+                            "text": "w8a dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, for the Wikipedia dataset, we set the number of class-specific topics per class and the number of shared topics in GRLSI and GNMF as (K s , K c ) =",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 39,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection is annotated towards three different entities: Michelle Obama, Angela Merkel, and Angelina Jolie.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All methods are tested under the same blurry condition, where methods  use GoPro dataset  to train their models.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 88,
+                            "text": "GoPro dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Examples of event datasets include logs, transactions, notifications and alarms, insurance claims, medical events, political events, and financial events.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Looking at the split between positive and negative examples in the test data, one would achieve 61.8% relevance precision on HEAD and 59.7% relevance precision on TAIL by impressing every available social annotation to a user.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our Bayesian learning approach by conducting extensive experiments on the TREC 2004-2007 Genomics data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 119,
+                            "text": "TREC 2004-2007 Genomics data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We present raw images, range maps, and the corresponding S1-S5 results on D1-5 in , and on the stereo database of  in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An important driving factor for their fast development and great success is the existence of a large amount of benchmark datasets for systematic evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The DBR dataset describes connections between threads (the set X) and users (the set Y ) of the Duke Basketball Report message board from 2.18.07 to 2.21.07.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "DBR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , we create training data by partitioning the document collection DN into sub-collections (or document snapshots) with respect to time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second one is exchanging the training dataset and the testing dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "presents word recognition error and number of system parameters for four different versions of the system, for three different Resource Management test sets using the word-pair grammar.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results for the DBLP and Citation data sets are illustrated in  and (f) respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 47,
+                            "text": "DBLP and Citation data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "on Dataset-C is 0.464 (SD = 0.245), which is significantly lower than M(\u03b2 or \u0434anic ) = 0.654",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We achieve a new state of the art for multi-object segmentation without fine-tuning on the DAVIS 2017 validation dataset with a J &F mean score of 71.5%.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 120,
+                            "text": "DAVIS 2017 validation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As used in , this dataset is divided into two equal subsets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments are conducted upon the TREC ClueWeb09 corpus and the query stream from the MSN 2006 query log.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 56,
+                            "text": "TREC ClueWeb09 corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 87,
+                            "end": 105,
+                            "text": "MSN 2006 query log",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent statistically motivated studies have suggested that topic sets used in IR test collections should be substantially larger than they currently are in order to meet a clear set of statistical requirements.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 97,
+                            "text": "IR test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MVTec Anomaly Detection dataset comprises 15 categories with 3629 images for training and validation and 1725 images for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 35,
+                            "text": "MVTec Anomaly Detection dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a dataset satisfying \u03b3-margin Separation, there exists an algorithm which can find the target partitioning for any hypothesis class in O(( can be more than \u03b3 apart.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We resolve all redirects using the local Wikipedia database.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 59,
+                            "text": "Wikipedia database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimental results on Reuters and OHSUMED datasets show that with the help of Wikipedia thesaurus, the clustering performance of our method is improved as compared to previous methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 56,
+                            "text": "Reuters and OHSUMED datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Multiple augmentations are considered for each image in the AVA data set, and their corresponding features are stored.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 72,
+                            "text": "AVA data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also tested our method on the 2005 Pascal Challenge dataset, http://www.pascalnetwork.org/challenges/VOC/voc2005.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 62,
+                            "text": "2005 Pascal Challenge dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, given a set of word tokens and documents in a collection, all the tokens are put in a single cluster while each document in the collection is assigned to each singleton cluster.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The target collection is obtained from  and consists of 19, 955 product descriptions from Amazon.com grouped into 1, 171 webshops.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A group convolution is simply a collection of G disjoint convolutions (G is the number of groups).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "it relies on the high dimensional geometry of the classifiers -successfully computes sparse enough perturbations for all three datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We note that for the case of real data sets, we are using the class label as a proxy for the effectiveness of the distance function.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first one is the WT10g web collection and consists of 1,692,096 web pages, as well as the associated TREC topics (451-550) and judgments.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 41,
+                            "text": "WT10g web collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 116,
+                            "text": "TREC topics",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All the accuracies on ImageNet-1K dataset are tested on the validation dataset using the single view center crop.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 41,
+                            "text": "ImageNet-1K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The empirical results on the webspam datasets are encouraging because they verify that our proposed one permutation hashing scheme performs as well as (or even slightly better than) the original kpermutation scheme, at merely 1/k of the original preprocessing cost.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 45,
+                            "text": "webspam datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the first dataset, the only dependencies in pairs (2, 3) and  are correctly detected by all methods, and all other pairs were announced independent.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Hotel data set, it reaches the maximum value at 70 and 100 for USTM-FT(S) and USTM-FT(W) respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 22,
+                            "text": "Hotel data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the resolution reductions specified above were necessary to even run those baseline models on the non-MNIST datasets, even with state-of-the-art GPUs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is observable that SEQ2TREE's performance is better than our basic model's on Java dataset, which demonstrates the ability of SEQ2TREE to leverage code hierarchies.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 93,
+                            "text": "Java dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While retweet is recognized as the key mechanism for information diffusion on Twitter, a rich set of studies has been conducted to predict retweets , mainly based on classification frameworks which incorporate different features related to tweets or authors.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To compare with state-of-the-art methods, the classifier based on SVM was also tested on the datasets stated above.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Error rates for deformable matching on different datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They also aim at discovering the latent domains from datasets, by modeling the data with a hierarchical distribution consisting of Gaussian mixtures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Quantitative evaluation is conducted on the two data sets with label information: the Title data and the Question data, with the number of clusters ranging from 20 to 100.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 96,
+                            "text": "Title data",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 118,
+                            "text": "Question data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The so-called hyper-parameter \u03b1 acts as pseudocounts, and can be used to integrate collection statistics for smoothing the estimation .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The \"questioner\" user is shown the captions of the image (from COCO dataset ) while the image itself remains hidden.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 75,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We achieve a new state-of-the-art on this task for NYU Depth and KITTI datasets, having effectively leveraged the full raw data distributions.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 79,
+                            "text": "NYU Depth and KITTI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact, the global knowledge required regards the whole dataset, and not just the single partition currently considered.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The standard 20 Newsgroups (20NG) dataset is used that contains news articles about various topics such as sports, electronics, science, and more.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 41,
+                            "text": "20 Newsgroups (20NG) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the Vistas dataset contains many more annotated classes than the Cityscapes dataset, we only consider the subset of classes which are in both datasets and treat the rest as unlabeled during training.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 21,
+                            "text": "Vistas dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 68,
+                            "end": 86,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These datasets are fully calibrated and contain images captured by a forward-looking camera mounted on a vehicle driving through a city.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The recall with increasing value of k of the DBLP data set are illustrated in .",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 58,
+                            "text": "DBLP data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The WT2G collection is a general Web crawl of Web documents, which has 2 Gigabytes of uncompressed data.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 19,
+                            "text": "WT2G collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the ratio of the corrupted training data goes to 80%, our model still has 67.19% mean accuracy, outperforming the  : Accuracy comparison of DKS, KD and its variants on the ImageNet classification dataset using ResNet-18.",
+                    "annotation_spans": [
+                        {
+                            "start": 175,
+                            "end": 206,
+                            "text": "ImageNet classification dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Musk datasets contains feature vectors describing the surfaces of low-energy shapes from molecules.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "Musk datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since we use datasets with a known ground truth, we choose to employ a methodology that takes full advantage of external information.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly, if the auxiliary data set and the target data set do not share common classes, there will be no knowledge transformation in the optimization problem of Eq.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the CelebA dataset, we use the outputs of pool5 layer from VGG-Face  and for the text dataset, we use the outputs of pool5 layer from a VGG-19 network.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 22,
+                            "text": "CelebA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the previous work on XMTC, we focus on top predictions by varying k at 1, 3 and 5 in P @k, resulting in 18 (= three k \u00d7 six datasets) values of P @k for each method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, in many applications, we are likely to perform a range query only over 3 to 4 dimensions of a 100-dimensional data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our work on two patent retrieval corpora, CLEF-IP 2010 and CLEF-IP 2011, using baselines which employ external resources for query expansion.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 66,
+                            "text": "CLEF-IP 2010",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 71,
+                            "end": 83,
+                            "text": "CLEF-IP 2011",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides on LFW and YTF datasets, we also report the performance of ArcFace on the recently introduced datasets (e.g. CPLFW  and CALFW ) which show higher pose and age variations with same identities from LFW.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 31,
+                            "text": "LFW and YTF datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 117,
+                            "end": 122,
+                            "text": "CPLFW",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 128,
+                            "end": 133,
+                            "text": "CALFW",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "NYC taxi dataset: The data is from The New York City Taxi and Limousine Commission 1 , which records fields capturing pick-up time, location and payment information of green taxis' orders.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 16,
+                            "text": "NYC taxi dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Robust track, which used the AQUAINT newswire document collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 65,
+                            "text": "AQUAINT newswire document collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to make the TAC 2014 EDL collection usable for our task, we obtained mention annotations for query-candidate mention pairs using Amazon Mechanical Turk.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 44,
+                            "text": "TAC 2014 EDL collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the following ablation study, we analyze (a) the impact of bias correction, (b) the split of validation set, and (c) the sensitivity of exemplar selection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Caltech-256  and the Caltech-UCSD birds dataset ; scene recognition on the SUN-397 database ; detection on the PASCAL VOC dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 91,
+                            "text": "SUN-397 database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 111,
+                            "end": 129,
+                            "text": "PASCAL VOC dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 47,
+                            "text": "Caltech-256  and the Caltech-UCSD birds dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, for our D2011 dataset, compared to a high average tweet implications value of 204.6 for credible events, the average tweet implications for non-credible events is merely 29.4.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It could be the case that a set of attributes defines a large query group, and even though the percentage of dissatisfaction cases resulting from this group is very small, it could still be overrepresented in the dissatisfaction dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 213,
+                            "end": 236,
+                            "text": "dissatisfaction dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "I n , by robustly estimating fundamental matrices between image pairs from collections of point matches, e.g., using RANSAC.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test the model on the MNIST dataset, which consists of 60K hand written digits of size 28 \u00d7 28 for training, and 10K for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 38,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first step relies on two scans of the dataset, during which the horizontal dataset is pruned, and decisions are taken concerning the number and size of partitions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular,  looked at how to support temporal range queries over versioned collections, where queries are restricted to a certain interval of time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On three few-shot image retrieval datasets, FastAP consistently outperforms competing methods, which often involve complex optimization heuristics or costly model ensembles.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The latter vocabulary was estimated from 5MB of English text, C code, Bib'IEXsource, and newsgroup data from outside the Calgary corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 135,
+                            "text": "Calgary corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As verified by our experimental results, on real data sets, we often only need to find a small number of rules, ranging from 20 on small data sets to 100 on large data sets, to achieve an accurate classifier.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Test collections are built mostly using the pooling method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Pascal VOC 2007 is a perspective image dataset with object annotations.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 15,
+                            "text": "Pascal VOC 2007",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider a data set X of elements, each of which is a d-dimensional vector over a set F of features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first one is to divide the dataset in disjoint partitions that can fit in main memory one at the time, and the second one is that every frequent itemset must be frequent in at least one of these partitions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We set the ratio to be 0.1 and K = 100 for our collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hence we collect the large-scale IP102 dataset, which contains 102 categories of insect pests with 75, 222 samples.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 46,
+                            "text": "IP102 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Though it is generally believed that relations in MNAR datasets are not recoverable, the following example demonstrates otherwise.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 63,
+                            "text": "MNAR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The performance trend for the other datasets are similar to those reported in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The format of this collection consists of 82 TREC queries with a total of 8,027 answer passages in total.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From both the real and simulated datasets, we randomly chose 1,000 APS or ground truth images with corresponding event stacks, not used in the training step, for testing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Indeed, in the next section we compare a replay-based algorithm to a model-based algorithm which was stable enough to achieve impressive sample-efficiency on the Atari benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 177,
+                            "text": "Atari benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We checked the Wikipedia dataset and found that this is because more than one third of Wikipedia articles fall into category \"geography\", and some geography related topics appear to be general in the document collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 32,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Considering all datasets and scenarios, Sum(c, o, 3) occurs in 95% of the generated functions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly, we compose these foregrounds with randomly picked unique background images from MS-COCO  to form the final dataset, totally 28610 images for training.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 98,
+                            "text": "MS-COCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The 2011 TRECVID MED dataset  consists of a collection of Internet videos collected by the Linguistic Data Consortium from various Internet video hosting sites.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 28,
+                            "text": "2011 TRECVID MED dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Role Player Analysis (performed once) and Popularity Computation.   shows the ratings provided by a user U on six movies extracted from the Netflix dataset and the rankings of two main actors/actresses in each movie at various rating levels obtained from STARmeter on imdb.com 5 .",
+                    "annotation_spans": [
+                        {
+                            "start": 140,
+                            "end": 155,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report a novel annotation effort performed with the ClueWeb09 benchmark where pages were labeled as being spam, sham, or legitimate content.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 74,
+                            "text": "ClueWeb09 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We sample triplet consists of a front, left, and right view image from 300W-LP dataset using the provided yaw angles, which results in 140k training triplets in total.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 86,
+                            "text": "300W-LP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the SMVguide3 binary classification dataset which contains n = 1260 points in d = 23 dimensions.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 50,
+                            "text": "SMVguide3 binary classification dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments with two IR collections show that adjusting the vicinity of term co-occurrence when computing TextRank term weights can lead to gains in early precision.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 35,
+                            "text": "IR collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using these keyword queries, we generated a time-travel query workload with 5 instances each for the following 4 different temporal predicate granularities: day, month, year and queries spanning the full lifetime of the respective document collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All hyper-parameters are selected using a validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data set they use is a selection of 84 saraband dances, by 15 different seventeen-century French composers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Their experiments on different TREC collections using popular IR metrics like MAP indicate that query segmentation can indeed boost IR performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 47,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These prior studies lead us to question to what extent the TREC-TS test collections are in fact robust when evaluating unpooled systems.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 83,
+                            "text": "TREC-TS test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Third, from the machine learning viewpoint, captioning models are typically trained on datasets where each image has at least 5 ground-truth captions (e.g., MSCOCO), and thus captioning models should also be evaluated on how well the learned conditional distribution of captions given an image approximates that of the ground-truth.",
+                    "annotation_spans": [
+                        {
+                            "start": 157,
+                            "end": 163,
+                            "text": "MSCOCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In experiments, we demonstrate the state-of-the-art visual recognition performance on MIR-Flickr database and PASCAL VOC 2007",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 105,
+                            "text": "MIR-Flickr database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 110,
+                            "end": 125,
+                            "text": "PASCAL VOC 2007",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ERACD v.s. Clusteringbased method on IMDB dataset lies whereas the clustering-based one returns collections of entities.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 49,
+                            "text": "IMDB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the TAC 2014 EDL collection has quite sparse mention annotations, we used Amazon Mechanical Turk to obtain additional relevance judgments for a set of pooled candidates from five systems.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 33,
+                            "text": "TAC 2014 EDL collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given an input image I and a collection of N detected objects in that image",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We carried out three sets of experiments on a number of different data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, since our suggested approach can be applied to any data set consisting of unstructured texts and specifications (key-value) data of entities, it can be employed in other problems.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection was used to show that a well established experimental IR system does not deal effectively with ambiguous queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "KTN and SPHCONV perform significantly better than the other baselines on the high resolution datasets, i.e., Pano2Vid and Pascal VOC.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 117,
+                            "text": "Pano2Vid",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 132,
+                            "text": "Pascal VOC",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides, BP4D database is the largest face images database with AU labels, and many meticulously-designed supervised AU recognition systems have been evaluated on this database.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 22,
+                            "text": "BP4D database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a showcase, we used the TREC2004 Robust test collection (528,155 documents and 250 topics).",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 58,
+                            "text": "TREC2004 Robust test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Future work will consider more available email collections and more features that could be signals for user reply behavior prediction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As performances on trimmed video datasets advance , recent focus of video understanding has shifted towards longer and untrimmed video data, such as VLOG , Charades , and EPIC-Kitchens .",
+                    "annotation_spans": [
+                        {
+                            "start": 149,
+                            "end": 153,
+                            "text": "VLOG",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 156,
+                            "end": 164,
+                            "text": "Charades",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 171,
+                            "end": 184,
+                            "text": "EPIC-Kitchens",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For learning the bilingual embeddings, we used sections of the Europarl corpus  which contains roughly 2 million parallel sentences.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 78,
+                            "text": "Europarl corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Parametric censored regression methods assume that the survival times of all instances in a dataset follow a particular distribution, and that there exists a linear relationship between either the survival time or the logarithm of the survival time and the features .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Efforts have been made to extract useful information from specialized collections such as biomedical literature  to create databases.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compared our method with existing state-of-the-art approaches on multiple datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The syllable \"tap\", samples 14000 through 17000 of the \"trainldr5/fcdfll sxl06/sx106.adc\" utterance on the TIMIT Speech Database, is used in all voiced examples in this paper.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 128,
+                            "text": "TIMIT Speech Database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Dependent Dirichlet processes  are distributions over collections of distributions, each indexed by a location in some covariate space (e.g. time), such that distributions that are close together in that space tend to be similar.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As follows from , the user attribute prediction accuracy reaches the maximum value when the number of topics is set to 20 for the Auto data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 130,
+                            "end": 143,
+                            "text": "Auto data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consequently, they lack the capability of coping with complicated cluster structures, thus often giving rise to noisy clusters, especially when applied to large-scale datasets collected from real-world settings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Inspired by previous work , we show that video prediction can be approached as a stochastic process: we gather a random collection of high quality proposals in one shot, with a multiple choice adversarial learning scheme that encourages diversity within the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the best value of h for the NPB and MLB data sets was \u221e (corresponds to MNB) as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 59,
+                            "text": "NPB and MLB data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the experiments we use the gov2.1000 and gov2.30 splits of the TREC GOV2 dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 83,
+                            "text": "TREC GOV2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ABCD dataset  is a dataset for detecting changes in buildings from a pair of aerial images taken before and after a tsunami disaster.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "ABCD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The goal of this paper is to present probabilistic algorithms that scale linearly with the number of observations and perform well on very sparse and imbalanced datasets, such as the Netflix dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 183,
+                            "end": 198,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, the difference between the original and randomized results provides the basis for a quantitative assessment of the significance of the clustering results obtained, and these significance results can then be used together with cluster quality results to decide how many clusters are present in the dataset when there is evidence in support of a cluster structure.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Voorhees  conducted the first large scale study of a word sense disambiguation system applied to the topics and documents of 5 test collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From , it can be concluded that P-pooling elevates the performance for many categories on all the VOC, Cityscapes and ADE20K datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 133,
+                            "text": "VOC, Cityscapes and ADE20K datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we examine the new scenario where the test set is also composed of heterogeneous domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "estimate that the OCR word error rate of scanned books in the IA database is less than 15% .",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 73,
+                            "text": "IA database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset, we randomly split queries in train and test (half of the queries are used for training, the other half for testing).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Third, we create a new large-scale synthetic dataset, Ob-Man, with hand-object manipulations.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 60,
+                            "text": "Ob-Man",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Table 1(a) compares the accuracy achieved by DC to the the last (15th) round of IDC with respect to all data sets described in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the proposed approaches on NUS-WIDE dataset  with 269,648 images.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 55,
+                            "text": "NUS-WIDE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the 4 explicit feedback datasets detailed in the previous section.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In almost all cases our method was also faster, improving runtime in 22 out of 25 data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here we show that this strategy shows great promise on real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The two data collections used in our experiments are: (i) the English Wikipedia revision history, referred to as the WIKI collection, which contains the edit history from January 2001 until December 2005 excluding all the minor edits, and (ii) a web archive, referred to as the UKGOV collection, provided by the Internet Memory Foundation (previously European Archive), consisting of weekly crawls of 11 government websites within U.K. during 2004 and 2005.",
+                    "annotation_spans": [
+                        {
+                            "start": 117,
+                            "end": 132,
+                            "text": "WIKI collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 278,
+                            "end": 294,
+                            "text": "UKGOV collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used a collection of more than 13k online videos from \"MSN Soapbox\" .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We include eight defense models which are shown to be robust against blackbox attacks on the ImageNet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 109,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While a powerful approach, the design of new discriminator layers and adversarial training are both challenging in practice, especially without a labeled validation set on the target domain (as is the case in an unsupervised setting).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Nevertheless, using monotonicity inference almost always pays off, the Ohsumed data set being the one exception.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 87,
+                            "text": "Ohsumed data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "also shows that for most of the data sets and similarity measures the test results for the LDoc;T ype and",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a complete or partial collection of measured fundamental matrices, our method seeks camera matrices that minimize a global algebraic error for the measured fundamental matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, we replace all polytopes with convex hulls formed by intersecting the polytopes with the dataset predictors.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We followed the same task design and setting as the ones used to create the test collections for the AOR task at SemSearch (as described in ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the former comes with tens of millions of <user, song, playcount> triples and the latter with hundreds of millions of <user, song, rating> instances, none of these collections include listener location.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "beled dataset, which contains 2000 unlabeled street scene images collected from the Cityscapes dataset, to show that the distillation schemes can transfer the knowledge of the unlabeled images.",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 102,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Incorporating \u03a3 s into the Gaussian prior leads to a new regularization term and the resulting model is: Extending the discussion on SVMs in , all regularized linear models in the form of eq.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "DBLP dataset : The DBLP graph contains the coauthorship network in the computer science community.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 12,
+                            "text": "DBLP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the authors proposed the use of VIPS  to guide the segmentation process, the clustering of segments into segment classes requires a manual process even when using VIPS, making the application of the method unfeasible in large Web page collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "EMCUA has the overall three-pixel-error of 2.09%/1.64% on KITTI2015/2012 dataset, and achieves 9.9%/13.2% decrease compared to PSM-Net, while MCUA has that of 2.14%/1.70%, and achieves 7.8%/10.1% decrease compared to PSM-Net.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 80,
+                            "text": "KITTI2015/2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When X Y are sparse and C xx , C yy are diagonal (like the Penn Tree Bank dataset in the experiments), Algorithm 1 can be implemented extremely fast since we only need to multiply with sparse matrices or inverting huge but diagonal matrices in every iteration.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 81,
+                            "text": "Penn Tree Bank dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We do five-fold cross validation on this dataset, i.e., we use 80% of the mobile apps to train our model and other methods, and the remaining ones are used for testing the effectiveness of prediction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These differences become especially important for the Arabic collection, where P r SCFG/",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "datasets, where our method produces results comparable or even outperforms the stateof-the-art MVSNet .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We do so on MNIST 12k, rotated MNIST SO(2) and reflected and rotated MNIST O(2) to investigate the influence of the presence or absence of certain symmetries in the dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 21,
+                            "text": "MNIST 12k",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 23,
+                            "end": 42,
+                            "text": "rotated MNIST SO(2)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 47,
+                            "end": 79,
+                            "text": "reflected and rotated MNIST O(2)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results obtained for all the methods on five datasets across four evaluation metrics are presented in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We took the TREC GOV2 collection, containing 426 GB of web pages crawled from the .gov domain, and applied the Boilerpipe software package 1 to generate a plain text version occupying90 GB.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 32,
+                            "text": "TREC GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Polyvore dataset  is a crowd-sourced dataset created by the users of a website of the same name; the website allowed its members to upload photos of fashion items, and collect them into outfits.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "Polyvore dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Europarl corpus contains roughly 2 million parallel sentence pairs between English and German as well as English and French, for which we induce 40 dimensional word representations.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 19,
+                            "text": "Europarl corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Assuming m submodels, an aggregation model divides the complete dataset D into m disjoint subsets, Here, K ii represents the covariance matrix of X i , I i the order-n i identity matrix, and n i the number of instances in D i , respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In section 4, we present experimental results obtained with our approach on the TREC-9 Ohsumed and the Reuters-21578 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 125,
+                            "text": "TREC-9 Ohsumed and the Reuters-21578 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We make a comparison to SGPN on our FineSeg dataset, using again AP with IoU thresholds of 0.25 and 0.5.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 51,
+                            "text": "FineSeg dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, it is challenging but valuable to design a multi-style captioning model by exploring such unpaired multi-stylized data in addition to handily available factual image-caption paired data (e.g. MS COCO  dataset), which motivates our work.",
+                    "annotation_spans": [
+                        {
+                            "start": 203,
+                            "end": 219,
+                            "text": "MS COCO  dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", C P be the collections of closed itemsets mined from the P partitions of the original dataset D, respectively D [p0,p1) , D [p1,p2) , . .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the collections used: TREC Aquaint, and two collection of patents from the US Patent Office (USPTO) and the European Patent Office (EPO), respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 40,
+                            "text": "TREC Aquaint",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(e.g., the Base Indi+Cont+Loca has an improvement of 16.6% mAP over the Base Indi , which obtains 72.6% mAP on the MS Dataset).",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 125,
+                            "text": "MS Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the results on both data sets reveal an interesting trend: the multilayer arc cosin often perform better than their single layer counterparts.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since we assume that the whole dataset cannot be mined in the main memory available, we exploit a divideet-impera approach through the following steps: 1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This issue is particularly crucial for a given new dataset without training data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By deploying MADNet together with MAD we introduce the first real-time self-adaptive deep stereo system enabling competitive performance on heterogeneous datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on a large-scale dataset  for conversation question answering, which consists of 200K dialogs with 1.6M turns over 12.8M entities from Wikidata.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SIS performed best on Wine dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 34,
+                            "text": "Wine dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is demo addresses the needs of four classes of users: test collection builders, researchers, lecturers, and students.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "J. We define the supervision on the vertex positions (L V Hand ) and joint positions (L J ) to enable training on datasets where a ground truth hand surface is not available.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first analyzed dataset consists of the E. coli promoter gene sequences (DNA) with associated imperfect domain theory .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": we obtain 1000 mobile apps from original datasets, whose rating scores are the largest top 1000 with over 1000 number of ratings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the Enron dataset we selected mails from 15 prolific authors written between 1998-01 and 2002-09.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 22,
+                            "text": "Enron dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate that our approach is comparable to prior methods on sparse datasets such as the NGSIM dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 108,
+                            "text": "NGSIM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We present several samples predicting up to 20 time stamps for all three datasets shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare our approach on 80 different classification data sets from the UCI repository to the current state of the art approaches Auto-WEKA and auto-sklearn and show that Automatic Frankensteining is outperforming its competitors on the large majority of data sets using the same CPU time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and  compare three state-of-the-art retrieval systems on 100 adhoc queries in the ClueWeb12B UQV100 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 110,
+                            "text": "ClueWeb12B UQV100 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We introduce the DensePose-Track dataset, based on the publicly available version of the PoseTrack dataset , which contains 10 339 images and 76 058 annotations.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 40,
+                            "text": "DensePose-Track dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 106,
+                            "text": "PoseTrack dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "adversary with access to publicly available auxiliary information can still recover the information about individual, as was the case with the de-anonymization of the Netflix dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 167,
+                            "end": 182,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Runtime and number of states of the online approach for the Hepatitis data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 79,
+                            "text": "Hepatitis data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Simple Margin method works surprisingly bad in this data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The three networks, S, T , and D, are trained according to the following objective function: For experiments on Pascal VOC and GMU datasets, we use the VGG-16  network up to Conv-5.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 139,
+                            "text": "Pascal VOC and GMU datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a function, PTR determines an upper bound on its local sensitivity at the input dataset through a search procedure; noise proportional to this upper bound is then added to the actual function value.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The absolute precision-recall tradeoff was also quite strong, especially for the Citation data set, in which a precision of 0.70 was achieved at a recall point of 0.84.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 98,
+                            "text": "Citation data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given that with 2 labeled documents the performance of the classifier is 0.179, 0.154, 0.053 and 0.078 respectively for each of the four corpora, we observe that we are able to recommend enough useful features to improve over the initial classifier by 10% (absolute difference between row 8 and row 5 of the performance of the 20NG, TDT3 and RCV1 corpora).",
+                    "annotation_spans": [
+                        {
+                            "start": 327,
+                            "end": 354,
+                            "text": "20NG, TDT3 and RCV1 corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on the challenging COCO benchmark show that when using mask score from our MS R-CNN rather than only classification confidence, the AP improves consistently by about 1.5% with various backbone networks.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 54,
+                            "text": "COCO benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For our dataset, described in more detail in the Supplement, the two non-online method failed to converge and required significantly larger memory.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All synthetic datasets are generated using the func-tion \"simple.surv.sim\" in survsim package",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this data set, no results for CFMN were available.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This work has the following main contributions, (1) we propose the first fashion compatibility method that uses context information;  we perform an empirical study of how the amount of neighbourhood information used during test time influences the prediction accuracy; and (3) we show that our method outperforms other baseline approaches that do not use the context around each item on the Polvvore , Fashion-Gen , and Amazon  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 391,
+                            "end": 436,
+                            "text": "Polvvore , Fashion-Gen , and Amazon  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the evaluation in the OE dataset, 8230 samples are randomly selected for training and the remaining 1953 ones are applied in testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 36,
+                            "text": "OE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The improvement of our method over the baseline on the TREC data is not as high as those on DataSetA.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 64,
+                            "text": "TREC data",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 92,
+                            "end": 100,
+                            "text": "DataSetA",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the FR-12 collection, since Liu and Croft '02  used only queries 51-100 in their experiments, we added queries 101-150 for training, while leaving out queries 51-100 for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 24,
+                            "text": "FR-12 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Kinetics is a large data set for human action analysis, containing over 240, 000 video clips .",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 8,
+                            "text": "Kinetics",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 5 we provide an empirical comparison of the two bounds on several synthetic and UCI datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 103,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on Physiology ECG Data Set.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 34,
+                            "text": "Physiology ECG Data Set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The numerical features were extracted from three TREC collections, and from three query logs.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 65,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It also fails to outperform the simple DenseVLAD baseline on the Street scene, which is the largest and most complex scene in the Cambridge Landmarks dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 130,
+                            "end": 157,
+                            "text": "Cambridge Landmarks dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the simulation of SGLD-SA in logistic regression and the evaluation of SGLD-SA on UCI datasets, we leave the results in Appendix C and D.  Classification with Auto-tuning Hyperparameters.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 98,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Often, the tools for automating machine learning are extended by * Information Systems and Machine Learning Lab, University of Hildesheim, Germany methods from meta-learning to transfer knowledge from observed data sets to new ones to initialize the search .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 2 briefly discusses the related work, Section 3 describes the dataset we used and explains why personalization is important.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the following, we conduct all experiments on the Breakfast dataset and report MoF accuracy unless stated otherwise.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 69,
+                            "text": "Breakfast dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At the surface, we are not given any information about the domains that the datasets contain, such as the statistical properties of the domains, or even the number of domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the held-out likelihood for LDA, hLDA, and thLDA on Twitter, AP, and RCV1 corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 80,
+                            "text": "Twitter, AP, and RCV1 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is possible to consider if an entity appears as a subject of a sentence as this is generally the person or thing carrying out an action (after running a dependency parsing over the sentence collection).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Eventually, the dataset is grouped into 6 styles (see ) and Tab.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To meet the first condition, our kernel needs to be a collection of triangles as our SpherePHD has triangular pixels. .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Sample RCV1 data set has D = 55,606 documents with a vocabulary size V = 8,625 unique terms.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "RCV1 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This results in a clean dataset of 5554 observations from the original 6360 observations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We found that the bias can be effectively corrected with a small validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the four UCI datasets, we randomly select 70% of the data for training and the rest for test, and repeat this process for 20 times.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results for the DBLP and Citation data sets are illustrated in  and (f) respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 47,
+                            "text": "DBLP and Citation data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Now we describe our positional index structure for versioned document collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Further, we show in our results on Taskonomy dataset that task ranking similarity is independent of model size.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 52,
+                            "text": "Taskonomy dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The standard task on this dataset is to recognize promoters in strings of nucleotides (A, G, T, or C).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is because for sparser datasets, PMF would have relatively more difficulty in learning users' preferences from fewer number of past ratings, while KPMF could still take advantage of the known social relations among users and utilize the observed ratings better.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "WIKI is an explicitly versioned document collection, for which all its versions are known.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to MORPH Album 2 and CACD-VS, the FG-NET dataset is much more challenging containing a wide covering of ages from 0 to 69.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 57,
+                            "text": "FG-NET dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 12,
+                            "end": 25,
+                            "text": "MORPH Album 2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 30,
+                            "end": 37,
+                            "text": "CACD-VS",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Additionally, we hold out a validation set to search for an optimal dimensionality",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although efficient, these methods usually require the input of GP to conform a special distribution (e.g., grid structure), making them inapplicable for most real-world datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, there have been attempts to build collections for sentiment analysis  or event detection .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Does small LLV in training set automatically generalize to test set?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results of the FCI algorithms on the dataset with unknown confounders are slightly worse than that without unknown confounders because the FCI algorithms consider the unknown confounders and output Partial Ancestral Graph (PAG)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Related, Kontonasios and De Bie  propose a two-phase approach to select the most informative noisy tiles from a collection of faulttolerant itemsets, using MDL and a maximum entropy data model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have extracted these features from a collection of 20 million images (see Section 2.3 for details).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The effectiveness of the proposed framework is validated on three different image retrieval tasks based on continuous similarities: human pose retrieval on the MPII human pose dataset , room layout retrieval on the LSUN dataset , and caption-aware image retrieval on the MS-COCO dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 215,
+                            "end": 227,
+                            "text": "LSUN dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 271,
+                            "end": 286,
+                            "text": "MS-COCO dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 160,
+                            "end": 183,
+                            "text": "MPII human pose dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also conduct experiments on CIFAR100 dataset, with 100 classes, 50K training and 10K test images .",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 47,
+                            "text": "CIFAR100 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "illustrates the materialization of the star neighbor relationships of a spatial dataset at a time slot.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Effective querying over these collections requires the use of so-called time-travel queries that combine temporal constraints with standard keyword queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To verify the effectiveness of the proposed AS-GCN, we conduct extensive experiments on two distinct large-scale data sets: NTU-RGB+D  and Kinetics .",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 133,
+                            "text": "NTU-RGB+D",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 139,
+                            "end": 147,
+                            "text": "Kinetics",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We present the equations for w i and z i below, please refer to the Appendix for the derivations: The soft evidence corresponding to messages from the neighboring nodes is obtained by running BP on the entire training dataset (labeled and unlabeled).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The practical value of this idea lies in the fact that we can exclude analysis options that are unlikely to characterize whatever structure is present in the dataset at twice the standard clustering cost, rather than m + 1 times this cost for some large m.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, it should be remembered that the collections of both engines are large and heterogeneous, which increases the likelihood of finding matches to more than one interpretation of an ambiguous query.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, their computa-tional burden is not tractable for large datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For Flickr datasets we use 1,000 images for validation, 1,000 for testing and the rest for training (consistent with ).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 19,
+                            "text": "Flickr datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For , we obtain the minimum and maximum values of pairwise distances for each dataset and employ 1000 different values of equally distributed within this range.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the AOL dataset, using both short-and long-history features increases the gains to +6.45.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 18,
+                            "text": "AOL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation and Design Validation on FlyingTh-ings3D. As annotating or acquiring dense scene flow is very expensive on real data, there does not exist any large-scale real  dataset with scene flow annotations to the best of our knowledge  .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We trained SimplE-ignr and SimplE (with tied parameters according to the rules) on this new training dataset with the best hyper-parameters found in the previous experiment.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and  illustrate the comparison of each method's clustering performance on the two data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, the classical CORI collection selection algorithm  is used as a baseline.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 42,
+                            "text": "CORI collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "claimed that the PASCAL dataset  is not suitable to test context-based object recognition algorithms as most of its images contain only a single object class.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 31,
+                            "text": "PASCAL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Overall our results from training with soft pseudo-labels are better than  on this dataset by \u223c5 in terms of AP.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is clearly seen by comparing the performance of ML-LDR and ML-LDR-ER on the News20 dataset, for which the difference is quite significant.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 95,
+                            "text": "News20 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The learned model, i.e., ranking function f (Mc), is then applied to each candidate c for each object o in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the performance of the relational dense captioning task on relational captioning dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An appropriate regularization parameter was selected by means of cross validation, evaluating the marginal likelihood on a test set (analogous to the procedure described in ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "H-FD is slightly more effective for the ClueWeb-B and the Gov2 collections, while being slightly less effective for the Robust04 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 74,
+                            "text": "ClueWeb-B and the Gov2 collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 120,
+                            "end": 139,
+                            "text": "Robust04 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the performance of the proposed approach on publicly available LtR datasets, nding that the improvements measured over the standard algorithm are statistically signi cant.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the dataset 300W-LP, for each real facial image, several synthetic rendered views are generated.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 22,
+                            "text": "300W-LP",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reference captions of the test set are used and the generated caption with the maximum score for each metric is chosen as best-1 (as also used in ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given the target dataset T and all its subgraphs G T = {g 1 , g 2 , \u00b7 \u00b7 \u00b7 }, the aim is to extract the set of significant subgraphs sig(G T )",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our approach with four datasets: an artificially generated dataset, a dataset compiled from a known multiclass text corpus, and two datasets related to one-class problems that received much attention recently, namely authorship verification and quality flaw prediction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We strictly follow the protocol of small training set to train the model and conduct evaluations on FG-NET.",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 106,
+                            "text": "FG-NET",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, we can imagine scaling the different tangent basis vectors according to their relevance in guiding correct searches as determined using a validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let us first describe the experimental setup, i.e., the datasets and baseline algorithms used for evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Test collections play an important role in information retrieval research, and a substantial amount of research activity in improving ranking models is focused around ad hoc retrieval runs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test our model on three datasets, as well as on a few of their variations that we discuss below.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To obtain multiple taxonomies per dataset, we use attribute labels and WordNet.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We develop efficient algorithms for topic learning and inference in short texts, and test our approach on three real-world short text data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Documents in a domain-specific collection usually contain very similar terms with similar frequencies, though each document might describe a different topic or object.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to cover a wide range of scenes, we use multiple datasets for training.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although there are differences between the two datasets, such as different lidar calibration parameters and different systems for obtaining ground truth, our approach still achieves the best average performance among evaluated methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Partition-based clustering methods will generally partition any dataset D into a specified number of disjoint subsets, regardless of how appropriate such a partitioning may be to the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The main observation we can draw from the ad hoc retrieval results presented in  is that using a combination of external information sources performs always better than only using the target collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The VERI-Wild dataset and the feature distance adversary scheme is expected to facilitate the largescale vehicle ReID research from the perspective of figuring out the ReID performance bottleneck in the wild.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "VERI-Wild dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The four image datasets are commonly used as distinctive domains in research in visual domain adaptation .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One pattern that is not found using subgraph isomorphism is hence the following:  Paper Author Paper Databases Preliminaries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments are conducted on the Massive Open Online Courses data set containing 2244 courses and their 73,873 reviews, collected from coursera.org.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 73,
+                            "text": "Massive Open Online Courses data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We separately use layer 1 \u223c layer 4 as well as their combination as the translation constraint to test the scene recognition performance of TRecgNet, compared with directly fine tuning pre-trained ResNet18 model; all the experiments are conducted on SUN RGB-D dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 250,
+                            "end": 267,
+                            "text": "SUN RGB-D dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, our experiments show that EM achieves an average solution quality within 95% of optimal for the standard MAP benchmark of protein design problems.",
+                    "annotation_spans": [
+                        {
+                            "start": 114,
+                            "end": 127,
+                            "text": "MAP benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We preprocess the 20-NewsGroups data by removing stop words and words : Cross-domain data sets generated from 20-NewsGroups and Reuters-21578.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 36,
+                            "text": "20-NewsGroups data",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 110,
+                            "end": 123,
+                            "text": "20-NewsGroups",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 128,
+                            "end": 141,
+                            "text": "Reuters-21578",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Items are ranked by their observed frequency in the training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We adopt a popular encoderdecoder model 1 pre-trained on the ADE20K dataset  to generate semantic maps.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 75,
+                            "text": "ADE20K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we report the results of two experiments based on synthetic and benchmark datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used two long-standing SR datasets, WordSim353  and MC30 , as well as TSA287  and Atlasify240 4 , the SR dataset we developed as part of the experiment described in Section 5.2.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 49,
+                            "text": "WordSim353",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 55,
+                            "end": 59,
+                            "text": "MC30",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 79,
+                            "text": "TSA287",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 85,
+                            "end": 96,
+                            "text": "Atlasify240",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The document frequency term \u03bb w is calculated as: where N w and N denote the number of documents in the collection that contain w and the total number of documents in the collection, respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The training time of our network on a GPU server (configuration: E5-2682 CPU, 32G RAM, and 8 Tesla P100 graphics cards) is 2.5 days for human image matting dataset and 4 days for the DIM dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 118,
+                            "text": "Tesla P100 graphics cards",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 183,
+                            "end": 194,
+                            "text": "DIM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Comparison of all these methods in terms of computational time (in seconds and in logarithmic scale) and RSE on synthetic third-order tensors by varying tensor sizes (a) or given ranks (b), and the BRAINIX data set: running time (c) and RSE (d).",
+                    "annotation_spans": [
+                        {
+                            "start": 200,
+                            "end": 216,
+                            "text": "BRAINIX data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the first dataset, the a priori event distribution was uniform, and in the second dataset events have followed a predefined burst-type a priori distribu-tion.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Shodan itself does not process sensor outputs and despite the size of its dataset, catching everyday objects on Shodan is still not straightforward as servers, routers and network devices constitute the majority of devices on its database.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SegCaps  uses capsules for image segmentation and they achieve the state-of-the-art results on LUNA16 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 109,
+                            "text": "LUNA16 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the following, we will present the results and analyses on each dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that all combinations support our original claims that neither DTWD nor DTWI dominates the other, and that on all datasets, DTWA is at least as accurate as the better of DTWD and DTWI, and often more accurate.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conducted experiments on a vertical search setting that searches a Web query over a large collection of question pages from Yahoo Answers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our SYNDOF dataset enables a network to accurately estimate a defocus map due to the following properties.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "SYNDOF dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since variables are binary, the joint distributions can be computed using the number of variable values in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the above methodology to distribute the Tweets-2011 collection to the participants of the TREC 2011 Microblog track, which is now available to everyone.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 70,
+                            "text": "Tweets-2011 collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 98,
+                            "end": 123,
+                            "text": "TREC 2011 Microblog track",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Entities are evolving in the stream corpus as time goes by, yet semantic features are not capable of portraying the  Cosine similarity between d and the ith citation of e in",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 42,
+                            "text": "stream corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It can be seen that our method achieved superior accuracies to all the other methods in 22 of the 25 data sets tested.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We sample 141 patient records from our simulator with the same sample size as the real-world dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use six publicly available networks from the Stanford Large Network Dataset Collection 8 .",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 89,
+                            "text": "Stanford Large Network Dataset Collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since Graph-based model+Query Likelihood Model and Graph-based model+KL-divergence Model perform similarly on both datasets (shown later in Section 5.4), we only use Graphbased model+Query Likelihood Model in this subtask as an example.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "See Section 3 for more details on the data collection procedure, size of the dataset and the kinds of data included.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The actor professional network is constructed from the Internet Movie Database (IMDB) 1 , where the nodes are the actors/actresses and the edges are the numbers of the co-stared movies between two actors/actresses.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 87,
+                            "text": "nternet Movie Database (IMDB) 1",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As we will show in our experiments, its major drawback is that it leads to the worst test set accuracy and detection of rare categories for all datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The studies  and  appear to be the first to evaluate proximity on TREC data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 80,
+                            "text": "TREC data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is noteworthy that we use the model trained on the DTU dataset  for all our experiments without fine-tuning.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 65,
+                            "text": "DTU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the USPS and Pendigits data sets CH used a burn-in period of 1, 000 updates; on MNIST it was 5, 000 updates; and on 20 Newsgroups it was 20, 000 updates.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 36,
+                            "text": "USPS and Pendigits data sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 120,
+                            "end": 133,
+                            "text": "20 Newsgroups",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 37,
+                            "end": 39,
+                            "text": "CH",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 84,
+                            "end": 89,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After removing non-English tweets, the stream dataset contained 56,415,430 tweets published by 9,292,345 users, with an average of 805,935 tweets each day.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The sampling is done such that the items in training, validation, and test sets are non-overlapped.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To train the PanoNet CNN, we rely on data from 5 complementary datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For audio data they used the CAL10K dataset, which consists of 10,832 songs, so it is comparable in size to the subset of the MSD that we used for our initial experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 43,
+                            "text": "CAL10K dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 126,
+                            "end": 129,
+                            "text": "MSD",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To analyze the cluster components F learned by GLNP, we plot the components learned from the Iris, Yale Faces and Glass datasets in .",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 128,
+                            "text": "Iris, Yale Faces and Glass datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Building on the earlier studies, they evaluate their models using the previous manual classifications as well as a new collection based on TREC Blog06.",
+                    "annotation_spans": [
+                        {
+                            "start": 139,
+                            "end": 150,
+                            "text": "TREC Blog06",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although we zoom our cameras to focus on the hands, we vary the : ObMan: large-scale synthetic dataset of hand-object interactions.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 71,
+                            "text": "ObMan",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The synthetic datasets were generated by sampling from a 3 -truncated Gaussian mixture model with 5 components, and with samples-sizes in  ].",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, the distribution of training instances among target categories in large-scale datasets such as LSHTC in  follows a fat-tailed power-law distribution .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "[1] based document classification algorithm which does not require any labeled dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, we visualize the output when sampling part of the representations from our model in : the top left shows an image from the Open Images test set, when we store all scales (losslessly).",
+                    "annotation_spans": [
+                        {
+                            "start": 134,
+                            "end": 154,
+                            "text": "Open Images test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Stanford Dogs dataset is another benchmark dataset for fine-grained image categorization recently introduced in .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 25,
+                            "text": "Stanford Dogs dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By conducting an appropriate user study, we calibrate and validate the measure against the TREC 2005 Robust Track test collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 129,
+                            "text": "TREC 2005 Robust Track test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Third, for a given test document, an LM is estimated based on the interesting patterns in the document and in the background corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 131,
+                            "text": "document and in the background corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also measure the distribution of statistics of interest, such as player location or velocity, and show that the distribution induced by our generative model better matches the empirical distribution of the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For all datasets, we use a three-fold cross-validation and report the mean performance over the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to evaluate the effectiveness of our quilt detection algorithm at identifying spam web pages, we drew a small sample from the set of detected quilts, and extracted the words of each quilted page and their source documents from the ClueWeb09 corpus (again leveraging DryadLINQ).",
+                    "annotation_spans": [
+                        {
+                            "start": 240,
+                            "end": 256,
+                            "text": "ClueWeb09 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The summary should keep alignment with the source collection, which is proved to be significant as proposed in ; (3)Coher-ence.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, we present a new traffic dataset (TRAF) comprising of dense and heterogeneous traffic.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 51,
+                            "text": "TRAF",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Nonlinear Support Vector Machines (SVMs) are investigated for visual sex classification with low resolution \"thumbnail\" faces (21by-12 pixels) processed from 1,755 images from the FERET face database.",
+                    "annotation_spans": [
+                        {
+                            "start": 180,
+                            "end": 199,
+                            "text": "FERET face database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed method is able to detect about 5% more categories in the test set compared to the CS-SVM method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For construction of features when modeling, we however looked at our entire dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we plan to similarly characterize collections of essential matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We built a dataset of security prices over a 30-year time frame by joining stock and ETF trading histories to a database of global news headlines (details in supplement).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A similar trend is observed by increasing the number of indices i.e., the parameter k.   shows the comparative analysis of MC-LSH algorithm with UCLUST, CD-HIT and CROP on equal length sequences for datasets DS1 and DS2.",
+                    "annotation_spans": [
+                        {
+                            "start": 208,
+                            "end": 211,
+                            "text": "DS1",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 216,
+                            "end": 219,
+                            "text": "DS2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results reported in  and 4 are those averaged over four trials on WSJ and AP datasets, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 89,
+                            "text": "WSJ and AP datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Especially with Protocol 4, which is the most difficult protocol on this dataset, the reduce of the error rates is significant ( by comparing the best with the second best).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We focus on sparse binary data and show that our score can be evaluated exactly using a single sparse matrix multiplication, making it possible to apply our algorithm to very large datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "proposed ISM which is based on SPADE, by maintaining a sequence lattice of an old database.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "k1 and k3 are tuning constants which depend on the dataset used and possibly on the nature of the queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Its subset, the ISIC Lesion Boundary Segmentation dataset, consists of 2594 images from patient samples presented for skin cancer screening.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 57,
+                            "text": "ISIC Lesion Boundary Segmentation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A random forest is represented by an ensemble of decision trees, and predictions of test dataset labels are combined over all decision trees in the forest.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In both datasets, LNP failed to make predictions for around one third to a half of the data points ( &(e)).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To enable the structured study of such adversarial effects, this paper presents a dedicated dataset of cross-platform social network personas (i.e., the same person has accounts on multiple platforms).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As ablation study, we consider the OIM  baseline and evaluate the separate benefits of the proposed QRPN, QSimNet and QSSE-Net on the CUHK-SYSU dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 134,
+                            "end": 151,
+                            "text": "CUHK-SYSU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When applied to real data sets the large sample limit no longer applies and inconsistent causal relations may result.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conducted experiments on the Web snippet dataset that has been used in .",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 51,
+                            "text": "Web snippet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As can be seen, even though the logs are from search engines serving different collections and different user needs, there are certain consistent trends.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Contrary to GQA, those datasets are either small in scale  or use only a restricted set of objects and a handful of non-compositional templates .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i to lie in a small interval \u00b1 around  , and \"one-pixel attack\" (1-PA)  on the MNIST and the CIFAR-10 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 110,
+                            "text": "MNIST and the CIFAR-10 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we detail a new methodology for building and distributing Twitter corpora, developed through collaboration between the Text REtrieval Conference (TREC) and Twitter.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 88,
+                            "text": "Twitter corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These music clips are in fact punk rock but are annotated as metal in the GTZAN dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 87,
+                            "text": "GTZAN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These datasets aim to achieve appearance-based head-free and calibration-free gaze estimation in a wide range of scenarios involving significant variations in illumination, head pose, background and so on.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For computational simplicity, 1000 documents are randomly selected from the TREC AP88 dataset and are used as an external information source for both datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 93,
+                            "text": "TREC AP88 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In each experiment, we partition the dataset N into three datasets: a training set N trn , a test set N tst , and a validation set N val .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that we use training set D only to extract the association rules and to compute the metrics, relying on validation set V to learn the solutions (see discussion in Section 5.2).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate the efficacy of our approach by applying it to different target networks including a classification network on AffNIST, and two object detection networks (SSD, Faster-RCNN) on different datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "LabelMe Video [2] datasets that illustrate the benefit of our approach to adapt object detectors to video.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 26,
+                            "text": "LabelMe Video [2] datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although lots of models have been proposed for recommending query refinement terms, most have exploited the document collection or previous search logs but not both.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments on this dataset are more convincing, since the test data are collected on random sampled songs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The TotalText dataset  consists of 300 test images and 1255 training images with more than 3 different text orientations: horizontal, multi-oriented, and curved.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "TotalText dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our dataset consists of a set of 3,452 RCs compatible with the above variation, extracted from the Switchboard corpus of spontaneous American English speech.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 117,
+                            "text": "Switchboard corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The latter ones, for instance, would allow one to predict future drought stress levels or topics in document collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our work we used publicly available dataset, which contains search logs for 1 month period, collected by Yandex for the Switching detection challenge 1 (Oct 23, 2012 -  .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We see that at the very top Diffusion rEGT : Top retrieved results for three queries from the ROxford dataset using diffusion  and rEGT, both using R-MAC descriptors and SV.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 109,
+                            "text": "ROxford dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Dalal and Triggs show this method produces no errors on the 709 image MIT dataset of ; they describe an expanded dataset of 1805 images.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 81,
+                            "text": "MIT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We argue that they do not need too much tuning in the Introduction, and now we conduct an extensive experiment on MovieLens-100K dataset to show their effects.",
+                    "annotation_spans": [
+                        {
+                            "start": 114,
+                            "end": 136,
+                            "text": "MovieLens-100K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Topics extracted from clustered documents, either deterministically or probabilistically via topic modeling , were only evaluated on a very small collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then evaluate the performance of hashing using DPQ for retrieval on the ImageNet validation set, and on the Caltech-101 and VOC2007 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 98,
+                            "text": "ImageNet validation set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 111,
+                            "end": 143,
+                            "text": "Caltech-101 and VOC2007 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that none of the methods can predict new nodes, therefore, HEP-TH, Cora and Citeseer datasets are not evaluated for these tasks.",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 98,
+                            "text": "Cora and Citeseer datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the case of synthetic data sets, we can control the intensity of the relationship between the feature variables and class label (cluster id) effectively by increasing the uncertainty level in the data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset will be applied in the NTCIR-WWW task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By means of transfer learning, no additional annotation cost are required so that this method can be easily extended to other datasets and more scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It's trained on the P \u2032 dataset and achieves nearly perfect accuracy of 97.8%.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The datasets we used in our experiments are as follows:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The bottom panel shows our results for the MIMIC II collection, which consisted of about 30,000 medical records  collected from a hospital intensive care unit.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 62,
+                            "text": "MIMIC II collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, how do the decision boundaries and loss landscapes of adversarially trained models compare to the ones trained on the original dataset?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We trained deep convolutional VAEs with 500 hidden dimensions on images from the CelebA dataset (resized to 64x64).",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 95,
+                            "text": "CelebA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As for the runtime complexity, we note that our algorithm efficiently handles the dataset in  and in the family of datasets DB Exp n .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Participants were recruited from the participant database at Centre for Cognitive Neuroimaging, University of Glasgow.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We randomly choose 500 video clips per class from the YouTube-Bounding Boxes video dataset  to train the motion segmentation network.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 90,
+                            "text": "YouTube-Bounding Boxes video dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also investigated the ability of the models to reproduce the n-block distribution found in the training and test sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As such, also for this sparse data Slim can consider candidates at lower support than Krimp can handle, while for the other datasets Slim only requires a fraction to reach better compression.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Scalability: A P2P network of 0.5M peers (which we believe to roughly corresponds to the number of servers used by a large scale centralized search engine), within which each peer would handle only 50K documents, should be able to index a document collection of around 25 billions pages.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Proposed methods are evaluated by comparing the results with the intensity images captured on the same pixel grid-line of events using online available real datasets and synthetic datasets produced by the event camera simulator.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our first dataset is the USPS dataset which was first collected in  and subsequently widely used in multi-task papers.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 37,
+                            "text": "USPS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We generated multiple datasets of c \u2208 {5, 10, 15, 20, 25, 30} and \u03bd \u2208 {3, 5, 10, 20, 50}.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We base our model on SSNMF for lexicon construction by incorporating different factorization schemes for the supervision matrix, which naturally results in a lexicon from the estimated factor matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After this process, we obtain 15595 vocabularies for our relational captioning dataset (11447 vocabularies before this process).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We assume a temporally ordered dataset, t i \u2264 t j for i < j, with initial time t 0 = 0 \u2264 t 1 and end time t N",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results for different values of the evolution threshold for the DBLP and Citation data sets are illustrated in  and (e) respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 95,
+                            "text": "DBLP and Citation data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "More detailed statistics of the dataset are reported in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe the following pattern: ranking of models evaluated on Places205 is consistent with that of models evaluated on ImageNet, indicating that our findings generalize to new datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 74,
+                            "text": "Places205",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 130,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To make our generated records close to the real-world scenario, we learn parameters from a dataset including 141 patient diagnostic records .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is prohibitive for large datasets, particularly when marginal gains \u2206(e|S) are themselves expensive to compute.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We created a test dataset of 1000 randomly selected CS papers published in 2011, the year after the last paper included in the CiteSight corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 127,
+                            "end": 143,
+                            "text": "CiteSight corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the first step, we train our networks using a synthetic training dataset provided in , where synthetic transformations are randomly applied to a single image to generate the image pairs, and thus the images do not have appearance variations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We empirically evaluate MWEM for range queries on restrictions of the Adult data set  to (a) the \"capital loss\" attribute, and (b) the \"age\" and \"hours\" attributes, as well as the restriction of the Blood Transfusion data set  to (c) the \"recency\" and \"frequency\" attributes, and (d) the \"monetary\" attribute.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 84,
+                            "text": "Adult data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 199,
+                            "end": 225,
+                            "text": "Blood Transfusion data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This stream dataset was used to extract breaking news for each time interval on Twitter.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Multiverse outperforms mf on the adom and food but has poor performance on comoda and sushi where it suffers because of the high dimensionality of context in those two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed technique achieves outstanding performances on Ac-tivityNet Captions dataset in most metrics.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 89,
+                            "text": "Ac-tivityNet Captions dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To train our models we use a dataset D = {(I, y, t)} which contains tuples (I, y, t) composed of an image I, a sentence y, and the corresponding POS tag sequence t. Since it is not feasible to annotate the \u223c .5M captions of MSCOCO with POS tags, we use an automatic part-ofspeech tagger.",
+                    "annotation_spans": [
+                        {
+                            "start": 224,
+                            "end": 230,
+                            "text": "MSCOCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use word embeddings learned from all these methods to evaluate the word pair similarities on several datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "studied an English-Spanish bilingual classification task for the International Labor Organization (ILO) corpus, which had 12 categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 110,
+                            "text": "International Labor Organization (ILO) corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For training samples with multiple keywords, such as the  : Semantic segmentation results with different initial heuristic cues on the PASCAL VOC validation set.",
+                    "annotation_spans": [
+                        {
+                            "start": 135,
+                            "end": 160,
+                            "text": "PASCAL VOC validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show, using the INRIA Person dataset, that estimates of configuration significantly improve the accuracy of a discriminative pedestrian finder.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 39,
+                            "text": "INRIA Person dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All these datasets are easily mined and processed using a minsup threshold of 1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Most of previous work on traditional recommendation focus on collaborate filtering based on a sufficient collection of user profiles, e.g. the famous movie recommender system -\"moviefinder\" .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The N-CARS dataset  proposes a binary classification task: deciding whether a car is visible or not using a 100 ms sequence of events.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "N-CARS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first learned grid-structured pairwise MRFs from the CEDAR dataset of handwritten digits, which has 10 datasets (one for each digit) of 16\u00d716 binary images.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 69,
+                            "text": "CEDAR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our gHMM model as well as the baselines are trained on the training sets and finally evaluated on the TREC test set containing 2947 queries and",
+                    "annotation_spans": [
+                        {
+                            "start": 102,
+                            "end": 115,
+                            "text": "TREC test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows that on Wondir dataset the question part is more important than the answer part for Q&A retrieval, which supports the observation of previous research .",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 28,
+                            "text": "Wondir dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reason is that most of the poses in the Human3.6 dataset are in a standing position, resulting in a worse performance on the \"Sitting\" and \"SittingDown\" actions.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 60,
+                            "text": "Human3.6 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "10% of the bAbI training set was held-out to form a validation set, which was used to select the optimal model architecture and hyperparameters.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 28,
+                            "text": "bAbI training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Wang and Li  investigated semantically annotated Wikipedia XML corpus to improve retrieval performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 69,
+                            "text": "Wikipedia XML corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Reuters-21578 collection contains Reuters news articles from 1987 .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 28,
+                            "text": "Reuters-21578 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On two challenging datasets (basketball and soccer), we show that we are able to produce more accurate forecasts than previous methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After running LDA (K=20) on NIPS, we used the inferred mixtures \u03b8 jk to separate the corpus into 20 different sets of documents corresponding to the 20 topics.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 32,
+                            "text": "NIPS",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimenter wishes to augment the existing dataset of absolute labels by adding comparison labels",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A further improvement is observed on the interleaving dataset when the greedy scheduling algorithm was used with the predictions from PGBM.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 61,
+                            "text": "interleaving dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the proposed methods on the PASCAL VOC 2012 semantic segmentation benchmark  which contains 20 foreground object classes and one background class.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 87,
+                            "text": "PASCAL VOC 2012 semantic segmentation benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "provided ground-truth time-series datasets by mimicking properties of real climate and weather datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 104,
+                            "text": "climate and weather datasets.",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Definition: An unlabeled dataset is a collection of search goals without success labels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SRW outperforms the RWR on cond-mat and hep-lat on MAP and AUC, but becomes worse on hep-ph. MERW fails to improve the performance of RWR on most of our datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While we still report quantitative results on objects for completeness, the ground truth errors prevent us from drawing strong conclusions from reconstruction metric fluctuations on this dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, the majority of face recognition systems evaluated on LFW currently make use of a preprocessed version of the data set known as LFW-a,  where the images have been aligned by a commercial fiducial point-based supervised alignment method .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the AMAZON, CUL, BX and ML-IMDB datasets, the words that appear in the item descriptions were collected, stop words were removed and the remaining words were stemmed to generate the terms that were used as the item features.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 44,
+                            "text": "AMAZON, CUL, BX and ML-IMDB datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Take the Ta-Feng dataset as an example, when compared with HRMAvgAvg with dimensionality set as 50, the relative performance improvement by HRMMaxAvg and HRMAvgMax is around 13.6% and 9.8%, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 24,
+                            "text": "Ta-Feng dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To address these problems, we introduce a large-scale dataset called \"COIN\" for COmprehensive INstructional video analysis.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, our FM-IQA dataset is much larger than theirs (e.g., there are only 2591 and 1449 images for  and  respectively).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let I be a set of items and let D be a dataset over I, cover a cover algorithm, and F a collection of candidate patterns",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data set and the online platform share similar characteristics with respect to fluctuations of the article collection and user set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, GoogleNet  trained on Market-1501 dataset achieves rank-1 recognition rate of only 5.0% on PRID2011.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 54,
+                            "text": "Market-1501 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We validate our sampling algorithm using several synthetic datasets, and also demonstrate promising unsupervised segmentation of data from the CMU motion capture database .",
+                    "annotation_spans": [
+                        {
+                            "start": 143,
+                            "end": 170,
+                            "text": "CMU motion capture database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The main goal of the SemSearch Challenge is to create evaluation collections for the task of Ad-hoc Object Retrieval  on the Web of data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When taking a risk-inclined approach, i.e. b < 0, the MAP and MRR for all five collections degrade.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A bar plot of the validation mAPs of NUS-WIDE dataset for various values of \u03bb 2 and \u03bb 3 is given in .",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 53,
+                            "text": "NUS-WIDE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first consider a data set of 300 handwritten 'p's recorded using an INTUOS 3 WA-COM digitisation tablet http://www.wacom.com/productinfo/9x12.cfm, providing trajectory data at 200Hz.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Efficiency: We compare the running time of our algorithms and the baselines on Amazon datset, and similar results can be observed on other two datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 92,
+                            "text": "Amazon datset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These collections are diverse in both sizes and content, which facilitate a thorough evaluation of our proposed CRTER model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "with 3D convolutions pretrained on Kinetics dataset  to extract 3D features, which has shown its superior performance on video classification tasks .",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 51,
+                            "text": "Kinetics dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A tessellation Y of a bounded domain W \u2282 R d is a finite collection of closed polytopes such that the union of the polytopes is all of W , and such that the polytopes have pairwise disjoint interiors .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, huge data such as collections of online books at Amazon TM , image repositories at Flickr TM or Google TM , or personal health records  are becoming ever more common and thus pose a challenge to research on interpretable matrix factorization.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each song s in our dataset, we attempt to collect two lists of social tags from Last.fm using their public data sharing AudioScrobbler 3 website.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the ImageNet-1k dataset is quite large and the training from scratch is extremely time-consuming, we conduct more ablation studies on CIFAR10  classification benchmark to deeply analyze ACNet.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 26,
+                            "text": "ImageNet-1k dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 137,
+                            "end": 170,
+                            "text": "CIFAR10  classification benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This simultaneous loss of power and predictive accuracy is characteristic of modeling large, heterogeneous datasets arising from aggregating multiple subpopulations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comparing the CPU time and memory of the ranking algorithms for the given query and web page on MSN data sion+ on real datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 104,
+                            "text": "MSN data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Distributed events are collections of events taking place within a small area over the same time period and relating to a single topic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is a dataset which contains handobject interactions with an emphasis on the variability of objects and backgrounds.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The performance of the algorithm is tested on both synthetic and real world datasets, as outlined in Section 5.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Simplex traces for the plant phenotyping application using for four randomly selected 5% of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our local copy of the Wikipedia database, which is described in Section 3.1, is used to compute the network features.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 40,
+                            "text": "Wikipedia database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All detection baselines have also been evaluated using the test set of the MS COCO 2018 dataset, where the annotations are not accessible for the evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 95,
+                            "text": "MS COCO 2018 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The mnist-back-rand data set was generated by filling the image background by random pixel values, while the mnist-back-image  test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 135,
+                            "text": "mnist-back-image  test set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 4,
+                            "end": 28,
+                            "text": "mnist-back-rand data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Many of the datasets used in data mining are expressed as sets of multidimensional vectors.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Considering temporal effects enabled MNB-W and MNB-S to perform better than MNB on the TV data set; however, increasing the number of training tweets is not effective for achieving their higher classification accuracy, as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 98,
+                            "text": "TV data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The common feature in these (and other similar) datasets is that the data is \"grouped\" naturally which correspond to the number of participants in the acquisition study -these",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This lack of validation data sets also makes it difficult to asses the impact of model mismatch and unobserved, highly-correlated noise sources (\"common input\").",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Ionosphere radar dataset (Blake and Merz, 1998) consists of a phased array of 16 high-frequency antennas.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 28,
+                            "text": "Ionosphere radar dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SATM is much faster than BTM on Snippet dataset, but requires much more time than BTM on BaiduQA dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 47,
+                            "text": "Snippet dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 104,
+                            "text": "BaiduQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With the proposed Parsing R-CNN, we achieve state-ofthe-art performance on several datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We note that the NYTimes dataset is 3 orders of magnitude larger than those considered in the HDP split/merge work of .",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 32,
+                            "text": "NYTimes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The preprocessed dataset contains 6,072 documents, 9,080 unique words and 324,298 word tokens in total.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we reported the average the forecasting error for all 8 baselines for the Top500Prop and Top1000Prop datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 114,
+                            "text": "Top500Prop and Top1000Prop datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following this rule, in COCO-Stuff dataset, we create a new zero-label class split by selecting 15 classes as unseen and the rest of the 167 classes as seen classes as they appear in ImageNet 1K which was used to pretrain ResNet.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 42,
+                            "text": "COCO-Stuff dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 183,
+                            "end": 194,
+                            "text": "ImageNet 1K",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the 20 Newsgroups (11,200 documents, 2,000-word vocabulary) and RCV1  (800,000 documents, 10,000-word vocabulary) datasets with the same preprocessing as in .",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 129,
+                            "text": "20 Newsgroups (11,200 documents, 2,000-word vocabulary) and RCV1  (800,000 documents, 10,000-word vocabulary) datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ProbE is designed so that the massive out-of-core training data sets are accessed sequentially for I/O efficiency reasons.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Corpora used in the two Microblog collections are much larger than the TREC-8 collection, so the judgment pools represent a much smaller percentage of the collection than for the TREC-8 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 45,
+                            "text": "Microblog collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 71,
+                            "end": 88,
+                            "text": "TREC-8 collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 179,
+                            "end": 196,
+                            "text": "TREC-8 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also provide various ablative studies to validate the relevance of the MuRel cell and the iterative reasoning process, and show that MuRel is highly competitive or even outperforms state-of-the-art results on three of the most common VQA datasets: the VQA 2.0 dataset , VQA-CP v2  and TDIUC .",
+                    "annotation_spans": [
+                        {
+                            "start": 237,
+                            "end": 249,
+                            "text": "VQA datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 255,
+                            "end": 270,
+                            "text": "VQA 2.0 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 273,
+                            "end": 282,
+                            "text": "VQA-CP v2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 288,
+                            "end": 293,
+                            "text": "TDIUC",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Luckily, current GPUs, paired with a highly-optimized implementation of 2D convolution, are powerful enough to facilitate the training of interestingly-large CNNs, and recent datasets such as ImageNet contain enough labeled examples to train such models without severe overfitting.",
+                    "annotation_spans": [
+                        {
+                            "start": 192,
+                            "end": 200,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using query logs as the source corpus significantly improved the quality of suggestions, at least as perceived by the user.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a training dataset consisting a set of answers together with ground-truth quality scores and votes on them, we determine the model parameters \u0398 =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on four standard datasets: CUB200 , Cars-196 , Stanford Online Products (SOP)  and In-Shop Clothes Retrieval (In-Shop) .",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 56,
+                            "text": "CUB200",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 59,
+                            "end": 67,
+                            "text": "Cars-196",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 70,
+                            "end": 94,
+                            "text": "Stanford Online Products",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 106,
+                            "end": 131,
+                            "text": "In-Shop Clothes Retrieval",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the experiments we randomly split each data set into three separate data sets: training, validation and testing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Due to the technique that the Twitter dataset was generated with, almost all paths in that graph are also shortest paths.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 45,
+                            "text": "Twitter dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results in the second half of  indicate that incorporating unigrams from the top retrieved documents translates into a signi cant increase in retrieval accuracy of SWDM on ROBUST04 and GOV2 collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 176,
+                            "end": 205,
+                            "text": "ROBUST04 and GOV2 collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We outperform our previous effort as well as alternative attention baselines on EPIC Skills  and our newly collected dataset, BEST, and present a comprehensive evaluation of the contribution of rank-aware attention.",
+                    "annotation_spans": [
+                        {
+                            "start": 126,
+                            "end": 130,
+                            "text": "BEST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TF\u00d7IDF assigns high weights to words that occur frequently in a given document and infrequently in the rest of the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While While the speeds of pruning are slower than in the cases of R 2 datasets, the proposed algorithm still runs much faster than the EM algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unlike the former two datasets, the ratings scale of MovieTweetings is 1-10, and it is extremely sparse.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Quantitative comparison of panorama fitting on the real dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the Sowerby dataset and a subset of the Corel dataset we also compare our model with two CRF models that operate at pixel-level.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 25,
+                            "text": "Sowerby dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 46,
+                            "end": 59,
+                            "text": "Corel dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In these cases, it is even more important that the learning method can handle large-scale datasets, with time complexities that are linear to both the number of instances and the number of classes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The result of our experiments on a blog collection shows that our model is able to quantify the propagation of topics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On two holistic re-ID datasets, Market-1501 and DukeMTMC-reID, there do exist some images which contain either partial pedestrian or oversized bounding boxes.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 43,
+                            "text": "Market-1501",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 48,
+                            "end": 61,
+                            "text": "DukeMTMC-reID",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset allows us to compare our method against ground truth which was obtained using high-standard GPS/IMU sensors.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider our results as especially promising since the LOD test collections that are today available are noisy and incomplete, and since we expect both the quality and the coverage of LOD datasets to rapidly improve in the future.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 78,
+                            "text": "LOD test collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 187,
+                            "end": 199,
+                            "text": "LOD datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Overall, although the theoretical analysis shows that PRF methods should satisfy the \"distribution effect\" constraint, it does not substantially affect the retrieval performance in the AP and the Robust collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For training, we leverage the AMOS dataset  and build a large collection of outdoor images with the corresponding timestamps of when the photos were taken.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 42,
+                            "text": "AMOS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The purpose of the experiment on synthetic datasets is to verify the performance of calibration for the setting where Bayes-optimal 0-1-c risk is available.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We focused our experiments on 124,878 reviews associated with Amazon products from the Multi-Domain Sentiment Dataset 6 .",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 119,
+                            "text": "Multi-Domain Sentiment Dataset 6",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A typical test collection consists of a set of queries/requests, a collection of documents, and relevance judgments for querydocument pairs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We rescaled the images only for CIFAR10 and SVHN datasets as a data augmentation, since they have more rich high level features compared to MNIST and F-MNIST.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 57,
+                            "text": "CIFAR10 and SVHN datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 140,
+                            "end": 145,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 150,
+                            "end": 157,
+                            "text": "F-MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the protocol used in , we conduct experiments on the 36 UCR [30] time series datasets to evaluate performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 95,
+                            "text": "36 UCR [30] time series datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It starts with the normalisation (as listed in Algorithm 2) of weights w for each edge in the initial SHReC built from the document collection only (Lines 1-3 of Algorithm 1), so that the total weight of all edges originating from a term node sums up to 1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Despite the contextual information coincides with the item attributes, we use this dataset to verify the scalability of our model due to the lack of large-scale explicit contextual datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We do not use optical flow on this dataset due to the heavy computation and instead combine results of two RGB models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is true even, e.g., in the Genes dataset, where a higher number of clusters is selected by our algorithm (and thus a higher penalty for activating them is paid).",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 45,
+                            "text": "Genes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The following is the procedure we used to generate general synthetic datasets in which co-located event instances were randomly distributed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We repeat the same experiment with the NLP RTE data set from .",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 55,
+                            "text": "NLP RTE data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To be specific, L IRG outperforms Rocket, the best competing method, by 0.93% to 1.05% on different datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Isolet dataset was collected from 150 speakers uttering all characters in the English alphabet twice, i.e., each speaker contributed 52 training examples (in total 7797 examples 4 ).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "Isolet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also use seven real-life datasets, summarized in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first implicit feedback dataset we use is from the mobile app recomender frappe which contains 61,465 implicit feedback of 4073 Android applications by 953 users.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The intraclass variation of CUB-200-2011 is smaller than that of ImageNet-1k, because all classes of this dataset belong to birds.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 40,
+                            "text": "CUB-200-2011",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 65,
+                            "end": 76,
+                            "text": "ImageNet-1k",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\"One-pixel attack\" does not necessarily find good solutions for all the studied datasets, however, SparseFool -as",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested the efficiency and effectiveness of GRLSI and GNMF on two datasets 5 : Wikipedia dataset which consists of articles downloaded from the English version of Wikipedia and",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 98,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second dataset is comoda  which contains 2296 ratings of 1232 movies by 121 users.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Panoptic Segmentation: In , the author gave a benchmark of panopic segmentation by combining instance and semantic segmentation models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experimental validation on simulated dataset and real-world datasets are shown in Section 4.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Realisability: If a collection has any relevant documents, then a metric is realisable if it is possible to achieve the maximum value for that metric.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although we cannot yet make statements about the EaaS model in general, the TREC 2013 Microblog collection does offer an existence proof that a high-quality retrieval test collection can be constructed using this new method.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 106,
+                            "text": "TREC 2013 Microblog collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset, the table reports the average fraction of remaining nodes and edges after applying our method, respectively the QPBO method  (lower is better).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For comparison with previous models, we have computed the FID scores for CIFAR-10 and CelebA datasets using the official Tensorflow implementation, and for computing the FID scores of STL-10 dataset, we have used the Chainer implementation used by SNGAN .",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 101,
+                            "text": "CelebA datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 184,
+                            "end": 198,
+                            "text": "STL-10 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 81,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We identified near duplicates in the AQUAINT collection with a variant of Broder's duplicate detection algorithm , and only considered documents with the highest possible similarity to be duplicates of each other, i.e. all shared shingles are identical.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 55,
+                            "text": "AQUAINT collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the TrecQA dataset was ultimately constructed from TREC evaluations, the provenance information connecting answer candidates to their source documents does not exist.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 27,
+                            "text": "TrecQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The comparison results demonstrate the effectiveness of our approach and show that our approach outperforms the state-of-the-art methods on both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Root-mean-square-error (RMSE) and negative log-likelihood (NLL) of exact GPs and approximate GPs on UCI regression datasets using a constant prior mean and a Mat\u00e9rn 3/2 kernel with a shared lengthscale across all dimensions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This paper investigates the 37 cluster quality measures Q(\u00b7) described by Bolshakova and Azuaje , who used them with random subset selection to assess cluster stability (i.e., the degree to which \"similar\" datasets gave \"similar\" clustering results).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Position+Stopword combination performed reasonably well on both datasets, only requires local information, and  is simpler than any kind of relevance-based features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Without bells and whistles, it achieves 2.6% and 1.4% higher mask AP than Mask R-CNN and Cascade Mask R-CNN baselines respectively on the challenging COCO dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 150,
+                            "end": 162,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare gradient (KDES-G), color (KDES-C), and shape (KDES-S) kernel descriptors to SIFT and several other state of the art object recognition algorithms on four publicly available datasets: Scene-15, Caltech101, CIFAR10, and CIFAR10-ImageNet (a subset of ImageNet).",
+                    "annotation_spans": [
+                        {
+                            "start": 194,
+                            "end": 202,
+                            "text": "Scene-15",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 204,
+                            "end": 214,
+                            "text": "Caltech101",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 216,
+                            "end": 223,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 229,
+                            "end": 245,
+                            "text": "CIFAR10-ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We propose two methods for exact Gaussian process (GP) inference and learning on massive image, video, spatial-temporal, or multi-output datasets with missing values (or gaps) in the observed responses.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since many graduate students would like to explore IR in novel areas, it's likely they will need to build their own test collection to get the best measurements for their research.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct both quantitative and qualitative experiments on the KITTI dataset concerning the capability of predicting future motion, images, depth maps, and semantic segmentation.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 77,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use Animals with Attributes dataset , which consists of 30, 475 images of 50 animal classes, with 85 class-level attributes 2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 38,
+                            "text": "Animals with Attributes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To handle those sequences, we also store the majority class (i.e., the class of the largest population) in the training data set at the root node.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "indicate recipient-sender familiarity (note that spam e-mail lters were in place before collection).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we evaluate the performance of state-of-the-art deep convolutional networks on the IP102 dataset, including AlexNet , GoogleNet , VGGNet-16 (VGGNet) , and ResNet-50 (ResNet) .",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 113,
+                            "text": "IP102 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "3.3 is denoted as Label-smooth, and we show the effect of varying \u03bb on the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate how the recommendation accuracy varies with co-clustering granularity, we apply Bregman co-clustering  on the MovieLens dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 122,
+                            "end": 139,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Not everyone can talk TREC, CLEF, INEX, or NTCIR into running a track to build the collection you need.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All results are reported in mAP % computed over the 16 unseen categories in the entire COCO validation set.",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 106,
+                            "text": "COCO validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Assume that data have been split into two subsamples: the first data set D n0,m0 = {X + 1 , . . .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although in most cases, WTC framework prevails over the previous methods, it seems that WTC is less stable comparing to PTC: in the Sina Weibo dataset, WTC is even worse than Topic Cascade when K = 2 and K = 3.",
+                    "annotation_spans": [
+                        {
+                            "start": 132,
+                            "end": 150,
+                            "text": "Sina Weibo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This click dataset contains more than 37M clicks on 6M questions by 20M users following around 20M queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "compares the performance between AttnGAN and DM-GAN with respect to the FID on the CUB and COCO datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 104,
+                            "text": "CUB and COCO datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the same Gaussian mixture model but larger datasets, we present running times and times to convergence (or, if convergence is slow, the maximum number of iterations) in  (a) and (b), respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the Fscores (in percentages) of the four algorithms on the six UCI data sets under different amounts of constraints respectively, from which we can clearly see the superiority of our PMF algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Tests using the new collection show the impact of query ambiguity on an IR system: it is shown that conventional systems are incapable of dealing effectively with such queries and that current assumptions about how to improve search effectiveness do not hold when searching on this common query type.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Lastly, we note that the gap is not caused by the scale of dataset because ADL rarely fails for ImageNet-1k classes sharing similar background statistics (e.g., various breeds of dogs).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They show that the base hypotheses perform poorly on the mixture test set, which justifies the need for adaptation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Two examples of historical manuscripts digitized with scales to allow accurate recording of size Figure 6: Samples of images from the classification dataset hint at how difficult this problem is SEARCH ALGORITHMS.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The contextualization vector g k from Equation 1 is computed off-line for each and every XML document in the Wikipedia collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 129,
+                            "text": "Wikipedia collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train our model with the augmented training set and test it on the validation set to compare with other methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To compare with the benchmark results in , we tested our model on the multi-label task with the LYRL04 split  on RCV1, in which more than one out of 103 categories can be assigned to each document.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each cluster center can be used as a representative point to summarize the characteristics of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on the TREC-9 Ohsumed and Reuters-21578 datasets, show that the proposed approach is effective on AUC, AUP, mT9P and PR ranking measures especially when there are much less relevant examples than irrelevant ones.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 60,
+                            "text": "TREC-9 Ohsumed and Reuters-21578 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our model on the challenging COCO-stuff  and PASCAL-VOC 2012  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 82,
+                            "text": "COCO-stuff  and PASCAL-VOC 2012  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\u2022 Paper datasets: These are two multivariate multiple-response regression datasets from paper industry .",
+                    "annotation_spans": [
+                        {
+                            "start": 2,
+                            "end": 16,
+                            "text": "Paper datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Paleo dataset which is studied in this paper is the collection of density of \u03b4 18 O, a radio-active isotope of Oxygen, in four caves across China and India.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "Paleo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform experiments on the DUC2001 and DUC2002 datasets and the results demonstrate the good effectiveness of CollabSum.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 58,
+                            "text": "DUC2001 and DUC2002 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset contains 720 639 training, 155 036 validation and 154 045 test recipes, containing a title, a list of ingredients, a list of cooking instructions and (optionally) an image.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first evaluate the performance of SparseFool, JSMA, and \"one-pixel attack\" on differ-ent datasets and architectures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset consists of purchasing histories of 56,028 users and contains the gender, age, and marital status (demographic attributes) of all the users.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We collect evidence of how knowledgeable group g is via all documents in the collection and obtain p(t|g) = d p(t|d)p(d|g), where p(d|g) is the probability of document d given group g.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We denote as J(x, y, \u03b8), the cross-entropy between the prediction of the model for an input x and a label y.  illustrates a targeted attack on the ImageNet dataset, against an Inception v3 model .",
+                    "annotation_spans": [
+                        {
+                            "start": 147,
+                            "end": 163,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conducted experiments using Reuters Corpus Vol. 1 (RCV1), a collection of over 800K news articles collected from the Reuters news wire.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 59,
+                            "text": "Reuters Corpus Vol. 1 (RCV1)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a consequence, we can take the observation of such average silhouette coefficient values as evidence that structure in the original dataset has been destroyed at a significance level of q/m.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the first data collection, we manually selected seven documents from the RST Treebank",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 89,
+                            "text": "RST Treebank",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Starting from the traditional CNN architecture, we added one component of the proposed framework at a time and evaluated the novelty detection performance on the Caltech-256 dataset as a case study.",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 181,
+                            "text": "Caltech-256 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, unlike other machine learning algorithms, whose development has been greatly fostered by a large amount of available benchmark datasets, causal discovery algorithms are notoriously difficult to be systematically evaluated because few datasets with known ground-truth causal relations are available.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider a wide range of benchmark and real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experiment with the TREC datasets of the Web 2009 (queries 1-50) and",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 36,
+                            "text": "TREC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There have been a variety of instructional video datasets proposed in recent years.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Modern applications of machine learning (ML) deal with increasingly heterogeneous datasets comprised of data collected from overlapping latent subpopulations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact, for the majority of problems in the UCR time series classification data sets , time domain classifiers work very well.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 85,
+                            "text": "UCR time series classification data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This exception occurs because there are many unseen relevant documents in the TD2004 dataset and, when the user model assumptions do not hold, the baseline's condensing strategy  may be more effective because it does not rely on these assumptions.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 92,
+                            "text": "TD2004 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we augment KITTI with more instance pixel-level annotation for 8 categories, which we call KITTI INStance dataset (KINS).",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 128,
+                            "text": "KITTI INStance dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this purpose we created a simple dataset which is composed of a single sequence of multiple walking cycles performed by the same subject from the CMU mocap dataset 1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 150,
+                            "end": 167,
+                            "text": "CMU mocap dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "that all of the operations introduced in this paper-not just the update-have low computational complexity and work in a pure streaming-data setting, with no data warehousing and low storage overhead.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We generate such a table for each document in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Among existing methods, those based on domainadversarial training of deep networks  achieve the current state of the art on many benchmark domain adaptation datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Quantitative results on aPascal dataset for both the object segmentation (a), and attributes segmentation (b) tasks.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 41,
+                            "text": "aPascal dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The DBLP dataset does not contain most information about article contents and citation relationships, instead it consists of metadata about each article such as authors, publication venues, etc.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "DBLP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Data A high-quality databased from Symcat 2 database was used.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 52,
+                            "text": "Symcat 2 database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, a major weakness of these approaches is the need of collecting large-scale labeled datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to previous state-of-the-art, our defense method is far more capable of effectively handling a variety of image resolutions, large and small image perturbations, and large-scaled datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These datasets include WordSim353 (WS353) , WordSim Relatedness (WS Rel) , Turk ,",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 41,
+                            "text": "WordSim353 (WS353)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 44,
+                            "end": 72,
+                            "text": "WordSim Relatedness (WS Rel)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 75,
+                            "end": 79,
+                            "text": "Turk",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We selected topics from the collection with contemporary relevance where prior studies have reported that participants found the tasks at least moderately interesting and difficult .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed approach outperforms the state-of-the-art methods on two widely used AU detection datasets in the public domain.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 103,
+                            "text": "AU detection datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As for the ImageNet-C benchmark, instead of using the compressed ImageNet-C images provided in , we evaluate the models on corruptions applied in memory,  and observe that AutoAugment also achieves the highest average corruption test accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 31,
+                            "text": "ImageNet-C benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unlike in the siamcompetition2007 dataset, the main contribution comes from label regularizations, and entropy regularization is not useful in this dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 41,
+                            "text": "siamcompetition2007 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show effectiveness of our algorithm on three real world datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, to the best of our knowledge, all of the existing vehicle ReID datasets [26]  are captured under constrained conditions, and generally have limitations in the following aspects: 1)",
+                    "annotation_spans": [
+                        {
+                            "start": 67,
+                            "end": 80,
+                            "text": "ReID datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that gCTM has almost the same training time on NIPS and 20Newsgroups data sets, due to their small sizes.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 83,
+                            "text": "NIPS and 20Newsgroups data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The traditional approach is to mine concise representations for collections of patterns, either lossless, such as nonderivable  itemsets, or lossy, as for self-sufficient itemsets  and probabilistic summaries .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Blitzer et al.'s book (BOO) and DVD datasets [4] each contains 1000 positive and 1000 negative customer reviews of books or movies, and can therefore be used to evaluate our algorithm's ability to cluster by sentiment.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 44,
+                            "text": "Blitzer et al.'s book (BOO) and DVD datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform extensive experiments on four referring image segmentation datasets: UNC , UNC+ , G-Ref  and ReferIt .",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 83,
+                            "text": "UNC",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 90,
+                            "text": "UNC+",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 93,
+                            "end": 98,
+                            "text": "G-Ref",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 104,
+                            "end": 111,
+                            "text": "ReferIt",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Magellan Venus data set is a typical example: between 1991 and 1994 the Magellan spacecraft transmitted back to earth a data set consisting of over 30,000 high resolution (75m per pixel)",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 27,
+                            "text": "Magellan Venus data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MDC method iteratively performs agglomerative clustering over terms (e.g., word tokens) and conglomerate clustering over documents (e.g., web pages or citations in a collection) at random, and assigns documents to more accurate clusters based on the joint probability distribution of terms and documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Test collections drive much of the research in information retrieval.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then, based on the model trained only on the KITTI dataset, we directly test the model on the Ford dataset without any further training or fine-tuning.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 58,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 94,
+                            "end": 106,
+                            "text": "Ford dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Collecting such a training set provides a practical alternative to the more laborious one of collecting full image ground-truth.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Generally, a crawler continuously updates the document collection of the search engine by discovering and fetching new and modified documents from the Web, and deleting documents that are no longer available.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compressed collections used for evaluation are significantly larger than internal memory, so our timings account for disk seek and read latency as they are the dominant cost in document retrieval.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed algorithm iteratively finds the best direction on the unit sphere 2 , and then from the available samples in dataset selects the sample with the smallest angle to the found direction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent multiview datasets of humans, such as Human 3.6M , only provides 4 RGB views and a single low-resolution depth (TOF) sensor, which is insufficient for the task at hand; therefore we collected our own dataset with 20 subjects.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 55,
+                            "text": "Human 3.6M",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Zobel  showed that TREC collections can still be used to provide an unbiased comparison.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 35,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We proposed and evaluated eight new heuristics and two learning-to-rank based strategies, comparing them against three state-of-the-art techniques, in various datasets and scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, to analyze the substantial difference in our accuracy gain between two datasets, we investigate our failure examples from ImageNet-1k experiments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On both datasets, TFiLM layers improve the performance of the SmallCNN architecture; the resulting TFiLM model performs at or near the level of state-of-the-art methods from the literature.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have shown that PF-GCRF provides an accuracy improvement over GCRF that can even be larger than 10%, on real-world high impact datasets, where even a small improvement is highly appreciated.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the two datasets represent realistic classes of timeevolving document collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Density is the total number of traffic agents per Km in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We assume the availability of a held-out validation set only for simplifying the exposition.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, through a comprehensive set of empirical experiments using datasets collected from last.fm and whrrl.com, a number of interesting findings are obtained, including: 1) the social influence is beneficial for recommender systems and item recommendation made based on SIS (which integrates user behavior, social influence and item content) achieves the best performance; 2) user behavior (in form of location check-ins) in whrrl.com shows the spatial clustering phenomenon, and items (even with similar contents) in different spatial clusters tend to belong in different latent topics; 3) users making decisions mainly based on their own preferences, although friends' opinions may affect the decision making to certain degree, i.e., the probability for a user to follow an uncommon preference of his friends is relatively small",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results of RankSVM, RankBoost, AdaRank and FRank are reported in the Letor data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 87,
+                            "text": "Letor data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To verify the advantage of this approach, we compared the performance to that of a fixed value of \u03b4, where \u03b4 =0 .01, 0.1, 1, 10 to investigate the impact of \u03b4 on the overall performance on the WL-PRID 2011 and WL-iLIDS-VID datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 193,
+                            "end": 231,
+                            "text": "WL-PRID 2011 and WL-iLIDS-VID datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The physician evaluates the 100 records without knowing the source of the records (the simulator or the real dataset).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The weights computed by our proposed way have a \"scale free\" property, i.e. it is a relative similarity measure insensitive to the distribution of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection can be useful for further research on sentiment analysis, detecting and ranking triggers of sentiment spikes and sentiment prediction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The odometry ground-truth provided in the dataset contains drift and inaccuracies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further test RIM's unsupervised learning performance on two molecular graph datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A more powerful strategy would be to iteratively select the best addition out of all candidates-which naively, however, quickly becomes infeasible for larger databases or candidate collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With the aim to facilitate research on this problem, we describe a collection of tweets that can be used for detecting and ranking the likely triggers of sentiment spikes towards different entities.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The PASCAL-S dataset  contains 850 challenging images selected from the PASCAL VOC 2009 segmentation dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "PASCAL-S dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 72,
+                            "end": 108,
+                            "text": "PASCAL VOC 2009 segmentation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then we evaluate the DPP * /SDPP * rules and Dome on (c) the ADNI data set; (d) the Olivetti Faces data set ; (e) Yahoo web pages data sets  and (f) a synthetic data set whose entries are i.i.d. by a standard Gaussian.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 74,
+                            "text": "ADNI data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 84,
+                            "end": 107,
+                            "text": "Olivetti Faces data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 139,
+                            "text": "Yahoo web pages data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To scale GP to large-scale datasets, approxima-tion methods have been proposed .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Various digital images became a common part of both public and enterprise data collections and there is a natural requirement that the retrieval should consider more the actual visual content of the image data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Fashion-200k dataset from , MIT-States dataset , and a new synthetic dataset for image retrieval, which we call \"CSS\" (color, shape and size), based on the CLEVR framework .",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 20,
+                            "text": "Fashion-200k dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 28,
+                            "end": 46,
+                            "text": "MIT-States dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset was obtained from 32 buckets of users and covering a one-month period, where the first three weeks forms the training set and the last week was held out for testing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiment results on TDT2, Reuters-21578, and COIL-20 data sets are shown in , 8, 9, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 68,
+                            "text": "TDT2, Reuters-21578, and COIL-20 data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We summarize the experimental results of mAP performance of the compared algorithms on the two data sets in , and illustrate the details of the topn precision results in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the S3DIS dataset, each point is represented by a 9-dim feature vector (XYZ, RGB and normalized coordinates as to the room).",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 21,
+                            "text": "S3DIS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We ran a standard sketch-and-solve approximate SVD algorithm (see Section 3) on SNAP/AMAZON0302, an Amazon product co-purchasing dataset , and achieved very good low-rank approximation error in both norms for k = 30:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Three different evaluation measures are defined and very high performance scores are obtained for four language pairs of the EURO-PARL dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 125,
+                            "end": 142,
+                            "text": "EURO-PARL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows that on the TREC MB 2011 collection there is a general trend to prefer lower values of b, possibly because of the very small average document length, which, in turn, renders the deviation from the average length close to one.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 41,
+                            "text": "TREC MB 2011 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, the VizWiz study  found that up to 21% of these questions involve reading and reasoning about the text captured in the images of a user's surround- : Examples from our TextVQA dataset that require VQA models to understand text embedded in images to answer the questions correctly.",
+                    "annotation_spans": [
+                        {
+                            "start": 182,
+                            "end": 197,
+                            "text": "TextVQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section these criteria are assessed empirically by using them to train Prior Networks on the artificial high-uncertainty 3-class dataset 4 introduced in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Overall, the results obtained for the datasets considered here demonstrate that the proposed CNC approach to cluster validation is extremely effective in favorable cases.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use four-fold cross validation to evaluate performance owing to the size of TREC dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 91,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This compound selection criteria leads to better results for larger dataset sizes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ImageNet is a dataset of over 15 million labeled high-resolution images belonging to roughly 22,000 categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 8,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "V is a set of nodes containing all the unique concepts or terms derived from documents in the Intranet collection with a special node 'SHReC root' representing the root of the graph tree; is a weighting function that assigns to every pair of terms (x, y)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, we replicated the positive training instances four times to give 2,000 examples (but left the test set unchanged).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Lastly, our model attains better inception scores on both datasets, compared with the strong baseline (SN-GAN ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the USPS handwritten digits dataset with the task of distinguishing between '3' and '5' -this is the same experiment from .",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 42,
+                            "text": "USPS handwritten digits dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Wikipedia dataset contains 2,807,535 articles and the Web-I dataset contains 3,184,138 web documents.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 71,
+                            "text": "Web-I dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On both datasets, we utilize AlexNet  as our basic CNN architecture and take the outputs of fc6 layer from AlexNet as the image representation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Current methods are trained/tested on specific datasets, but are not comprehensively justified in the complex real scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The linear model outperforms RNN without pre-training on Nottingham and JSB datasets, but shows problems with the Muse dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 84,
+                            "text": "Nottingham and JSB datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 126,
+                            "text": "Muse dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use four standard benchmark datasets to compare with previously published results: the SCOP dataset (7329 sequences with 2862 labeled)  for remote protein homology detection, the Ding-Dubchak dataset [2] (27 folds, 694 sequences)  for protein fold recognition, multiclass remote fold recognition dataset , and music genre data [1] (10 classes, 1000 sequences consisting of 13-dim.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 102,
+                            "text": "SCOP dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 182,
+                            "end": 202,
+                            "text": "Ding-Dubchak dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It includes 200 queries for the ClueWeb09 corpus and 100 queries for the ClueWeb12 corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 48,
+                            "text": "ClueWeb09 corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 89,
+                            "text": "ClueWeb12 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the procedure of the Darmstadt dataset, we report PSNR and SSIM for each technique, on raw and sRGB outputs.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 48,
+                            "text": "Darmstadt dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, we use benchmark datasets to evaluate the practical performance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2995 documents in 7670 dimensions, 3 clusters), which represent datasets in different levels of size and difficulty of clustering .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The improvements on the WT10g collection are 19.63% and 26.38% over LM and RM, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 40,
+                            "text": "WT10g collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On four synthetic datasets, our PReNet and PRN are computationally very efficient, and achieve much better quantitative and qualitative deraining results in comparison with the stateof-the-art methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Again, we use the MIT CBCL Face Dataset #1, where 1000 face images and 2000 non-face images were used as a training set, and the rest of the dataset used as a test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 39,
+                            "text": "MIT CBCL Face Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The 20 Newsgroups data set 2 is a collection of newsgroup documents in 20 different topics.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 26,
+                            "text": "20 Newsgroups data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Such training set is composed of a collection of queries Q, where each query q \u2208 Q is associated with a set of assessed documents D = {d 0 , d 1 , . . .}.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to find a variety of high-quality meshes of frequently manipulated everyday objects, we selected models from the ShapeNet  dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 122,
+                            "end": 139,
+                            "text": "ShapeNet  dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The AR database involves many variations such as illumination, expressions and sunglass and scarf occlusion, as illustrated in .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "AR database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the purpose of information retrieval, we are primarily interested in using the resulting hierarchical query representation to model the relationship between a query Q and a document D in the retrieval corpus.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The BNC is a 100 million word collection of written and spoken language from a wide range of sources, designed to represent a wide cross-section of current British English .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 7,
+                            "text": "BNC",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For any gaze dataset, we cannot expect a researcher to collect i.i.d. data: which would mean spending effort into bringing in a participant and collect only one gaze sample.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our dataset, each head is annotated with a point at head center, and the bounding box of each head is also provided in test set to facilitate the evaluation of head detection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first four datasets are at http://www.csie.ntu.edu.tw/~cjlin/ libsvmtools/datasets/.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This poster describes the tasks as well as the document and topic collections that are to be used at the FIRE workshop.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Reliability diagrams of VGG-16 models trained with baseline, CI (ours) and VWCI (ours) losses in Tiny ImageNet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 118,
+                            "text": "Tiny ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus considering the number of evaluated training data points, the 2 nd order algorithm needs much fewer step than 1 st order gradient descent (see visualization in supplementary on MNIST).",
+                    "annotation_spans": [
+                        {
+                            "start": 182,
+                            "end": 187,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, the average silhouette coefficient values computed from the original dataset are shown as solid circles in this plot, and the horizontal dashed lines represent the interpretation threshold values discussed above.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The learning part of the model involves estimating the parameters of the model from a collection of documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To be compatible, we followed the same procedure of running through random test sets 3 times to report the average results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Typically, training such a model requires fully annotated collections of aligned imagestylized-caption pairs (paired data) for each style.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the performance difference between these methods may not be visibly high in the RCV-MCAT dataset, the results are still statistically significant.",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 105,
+                            "text": "RCV-MCAT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In terms of document representation, BOC demonstrates clear superiority over BOW for both BM25 and QLM ranking models, on both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "dataset in figure 2 and 4 respectively, using all consecutive frames.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation of the learnt policy on representative compiler benchmark suites (SPEC2006fp C/C++ , SPEC2017fp C/C++  and NAS benchmark suites ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The statistics of the datasets are summarized in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We surmise the failures on the calendar collection are due to the unusual combination of document length and document language.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 50,
+                            "text": "calendar collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider the CarEvolution dataset and compare the performances of our proposed strategy with two state of the art algorithms: the manifoldbased adaptation method in  and the low-rank SVM strategy presented in .",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 36,
+                            "text": "CarEvolution dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although AP produces better clustering accuracy on the VC data set, PIEC generates the correct cluster numbers for much more times.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 66,
+                            "text": "VC data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Detailed information about the TREC datasets and the evaluation criteria, please refer to http://trec.nist.gov.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 44,
+                            "text": "TREC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental evaluation conducted on IMDb dataset shows that taking into account these social features in a textual model improves the quality of returned search results.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 49,
+                            "text": "IMDb dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, we were able to get better performance on comparatively complex datasets such as CIFAR10, where the CapsNet in  did not show significant performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 101,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate different models on both synthetic (CLEVR ) and natural image (GQA [Hudson and Manning, 2019], CUB ) datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 53,
+                            "text": "CLEVR",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 107,
+                            "end": 110,
+                            "text": "CUB",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 75,
+                            "end": 78,
+                            "text": "GQA",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reduction rate for connect-4 data set is smaller than that for the CovType data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 88,
+                            "text": "CovType data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The result is that a WT can represent a document collection using n(H0 (T ) + 1) + o(n(H0(T )",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first report the results of the proposed approach, and perform a comparison with the existing state-of-the-art semi-supervised learning methods on the MNIST, SVHN and CIFAR-10 benchmarks.",
+                    "annotation_spans": [
+                        {
+                            "start": 154,
+                            "end": 189,
+                            "text": "MNIST, SVHN and CIFAR-10 benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2000 training examples as a validation set to choose the margin penalty parameter; after this parameter by cross-validation, we then retrained each SVM using all the training exam reference, we also report the best results obtained previously from three layer deep belief ne 3) and SVMs with RBF kernels (SVM-RBF).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use a collection of 18,443 physics book records with 53 queries and relevance assessments (qrels) from the iSearch dataset 1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 125,
+                            "text": "iSearch dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We carried out 5-fold cross validation on each of the five collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also see that SCST GANs using our Co-att discriminator  : Evolution of semantic scores over training epochs for COCO Test and OOC datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 141,
+                            "text": "COCO Test and OOC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, we conduct tests using a entailment checking problem; given the embedding of a formula f and the embedding of an assignment \u03c4 , predict whether \u03c4 satisfies f. Experiment Setup and Datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As an example, for the Gov2 collection, the effectiveness gain as a result of the global factor inclusion decreases from 6.2% for the QL baseline to 1.6% for the FD baseline.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 38,
+                            "text": "Gov2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the dataset provided by the CLEF 2017 eHealth Task 2.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast, our models are trained on the PERSONALITY-CAPTIONS dataset that has 215 traits and \u223c200,000 images.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 71,
+                            "text": "PERSONALITY-CAPTIONS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our empirical evaluation indicates that RIM outperforms existing methods on several real data sets, and demonstrates that RIM is an effective model selection method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the MLB data set, we removed stop words and treated punctuation and other non-alphabetic characters as separate tokens.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 20,
+                            "text": "MLB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we conduct extensive experiments on the datasets of Movielens and Yahoo!Rand.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the COCO dataset, the Inception Score increased from 21.85 to 26.21 and Rprecision increased from 52.55% to 74.52%.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 19,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this final part, we create additional images from the original NORB training set by applying global translations of 2, 4, and 6 pixels in eight directions (two horizontal, two vertical and four diagonal directions) to the original stereo-pair images 2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 83,
+                            "text": "NORB training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To save space, we may only report the experimental results for one of our proposed models on one of the datasets, because similar results are yielded by experiments for the other model or dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As we could compute Gaussian mixture models for the GEO approach only for the 767 most active users in the dataset due to data sparsity, the following results are reported for this subset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and it is still an open problem whether adversarial training can scale to large dataset such as ImageNet.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 104,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the PIE data set, we also performed data partitioning by the manual pose label and learnt boosting classifiers separately for each pose in AdaBoost method.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 20,
+                            "text": "PIE data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, the overhead of the UniGrid structure is not significant, when compared to the original data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Aggregated score distribution in the S100 dataset for all (d), for non-relevant (e), and for relevant (f) documents according to TREC judgments.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 49,
+                            "text": "S100 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As demonstrated by the poor performance on MNIST, the CNN has not learned anything about digits or digit recognition, simply overfitting to the bias of the training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 48,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also present empirical results on the use of this method in phoneme alignment on the TIMIT corpus, where it achieves the best known results on this problem.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 100,
+                            "text": "TIMIT corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "[x (n,1) ,...,x (n,Bn) ] can be either an ordered and unordered list of transactions depending on data sets, where B n is the length of the n-th user's history.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Five-fold cross-validation has been adopted and a holdout set is used as a validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "But most gaze estimation datasets have repeated multiple measurements from a subject (see ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the PROTEINS dataset, the node kernel was a product of a Gaussian kernel with \u03bb = 1/d and a Dirac kernel on the continuous-and discrete-valued node attributes, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 24,
+                            "text": "PROTEINS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The SWBD dataset, publically available through the MODIS repository as the MOD44W product, thus provides a label of land or water for every MODIS pixel at 500m for a single date, Feb 18, 2000.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "SWBD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "BookCrossing is a book rating dataset 4 , collected by  Setup & Evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 12,
+                            "text": "BookCrossing",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the most widely used data sets for evaluating the performance of zero-shot learning.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We denote the extended stylized corpus dataset as P \u2032 = {(\u0177 s , s)}, s \u2208 {s 0 , .., s k }.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The EUROPARL parallel corpus is a standard collection of text documents from the proceedings of the European Parliament  used for machine translation.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 28,
+                            "text": "EUROPARL parallel corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TDT4 was the benchmark corpus used in TDT2002 and TDT2003 evaluations.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 4,
+                            "text": "TDT4",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Taking MSVD dataset for example, OA-BTG obtains the average improvements of 3.25%, 1.15%, 5.45% on BLEU@4, METEOR, CIDEr scores, respectively, which indicates the effectiveness of bidirectional temporal graph.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 19,
+                            "text": "MSVD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated our approach on four standard datasets and compared its performance with the classification results provided by the datasets' creators, those estimated using the plug-in estimate \u03bb, and those obtained using the Fisher (FK) and TOP (TK) kernel  derived from the plug-ins.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "NNZ is the number of nonzero entries in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Three datasets are made available for the purpose of developing spelling alteration systems.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "UCF-101-24 contains 3207 untrimmed videos from 24 sports classes, which is a subset of the UCF-101 dataset, with spatiotemporal annotations provided by .",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 106,
+                            "text": "UCF-101 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 10,
+                            "text": "UCF-101-24",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These three datasets are neuron images (Electron Microscopy images).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The evaluation is done by repeating the process to create a new completely independent test data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to better understand how these two features contribute to the final results, we plot them in  and  for both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the ShapeNet dataset , using 35763 shapes for training, 5133 for validation, and 10265 for testing, following the split used in prior work .",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 27,
+                            "text": "ShapeNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, ST-RSBP is also evaluated on feedforward spiking convolutional neural networks (spiking CNNs) with the MNIST dataset and achieves 99.62% accuracy, which is the best among all SNN BP rules.",
+                    "annotation_spans": [
+                        {
+                            "start": 116,
+                            "end": 129,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also note that hierarchical DBH generally performs better than regular DBH, except for the MNIST dataset where performance is comparable to regular DBH.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 107,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the FSupp Collection, queries were evaluated incrementally with one, two, three, five, and seven terms removed from their corresponding relevant documents.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 24,
+                            "text": "FSupp Collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In comparison with other action detection datasets, such as J-HMDB and UCF-Sports, the two benchmarks are much larger and more challenging, and more importantly, they are temporally untrimmed, which fits better to the spatio-temporal action detection task.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 66,
+                            "text": "J-HMDB",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 71,
+                            "end": 81,
+                            "text": "UCF-Sports",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "showed that a probabilistic model (with some hand-coded geometric knowledge) can recover clusters containing 20 known object class silhouettes from outlines in the LabelMe dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 164,
+                            "end": 179,
+                            "text": "LabelMe dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows ihe comparison results for SVM on the 6 benchmark datasets and we make the following observations: \u2022 MTFS and the proposed framework ESFS outperform",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By using relational databases, IR systems can benefit from future advances more rapidly, and \"for free\".",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The final model is evaluated on two large-scale datasets for skeleton-based action recognition tasks, i.e., NTU-RGBD and Skeleton-Kinetics, and exceeds state-of-the-art performance on both.",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 116,
+                            "text": "NTU-RGBD",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 121,
+                            "end": 138,
+                            "text": "Skeleton-Kinetics",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also trained another SimplE-ignr and SimplE models on this dataset, but without incorporating the rules into the embeddings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, we split the experiment data set into three subsets: STUDY (S), TRAIN (T), and EVALUATE (E).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, looking at Equation 9 in Korolova et al. , we have where D2 is a dataset with a single user added, versus D1, which is the same dataset",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2) Experimentally, we show that our formulation outperforms the state of the art by significant margins (10%-20%) on most available datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also apply our method on the very large Tiny Image data set of 80 million images , to qualitatively show some example retrieval results obtained by our proposed method.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 62,
+                            "text": "Tiny Image data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The interval to produce a new version is often related to the size of the document collection, and typically varies from hours to days.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our evaluation is performed with real datasets collected from three popular Web 2.0 applications: the video sharing sites YouTube and Ya-hooVideo, and the online radio station LastFM 2 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While development of an end-to-end data driven system isn't feasible just yet due to the missing speech signal, the new audio-visual scene aware dialog dataset at least permits to develop a holistic dialog management and sentence generation approach taking audio and video signals into account.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The image pair from PF-Pascal and Proposal Flow datasets is annotated with correspondences that could be used as ground truth for image matching performance evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 56,
+                            "text": "PF-Pascal and Proposal Flow datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this case, we tested the results for two data sets with uncertainty level u = 3",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on Public Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 25,
+                            "text": "Public Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An important theoretical question is, given a collection of n 2 fundamental matrices, whether these fundamental matrices are consistent, in the sense that there exist n camera matrices that produce these fundamental matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Left plot in  shows clustering error bars of different algorithms on the dataset as a function of \u03c1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "in \u0740, \u202b,\u0753\u202a\u0742\u123a\u202c\u0750\u202c \u202b\u0751\u202c\u123b is the term count in the user profile representation, and \u2206 is the document collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following Balog et al. , to define this probability, we assume that associations a(d, ex) between experts ex and documents d have been calculated and define where D is the set of documents in the collection, and a(d, ex) is simply defined as to be 1 if if the full name or email address of expert ex (exactly) appears in document d, otherwise a(d, ex)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After training, the model is utilized to predict attribute labels for the re-id dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In virtually every run on each data set in which RuleNet converged to a set of rules that completely covered the training set, the rules extracted were exactly the original rules used to generate the training set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Obviously, the computation of (S + \u03b3I) \u22121 is impractical for large datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Test collections are research tools that are most useful when they are reusable, that is, when they fairly evaluate retrieval systems that did not contribute to their construction .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this dataset the hand and object meshes are provided.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Betancourt  proposed a new metric that uses a transformed Hessian matrix of \u03c0(\u03b8), and Betancourt and Girolami  demonstrate the power of this method for efficiently sampling hyperparameters of hierarchical models on some simple benchmarks like Gaussian funnel.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A classic example of this type of similarity is the Cylinder-Bell-Funnel artificial data set, where there is noise around the underlying shape, but also noise in the index of where the underlying shape transitions.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 92,
+                            "text": "Cylinder-Bell-Funnel artificial data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the Bing dataset, using all user-specific and demographic features together, the MRR gains can be almost doubled to reach 9.42%.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 19,
+                            "text": "Bing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ATR dataset  aims to predict every pixel with 18 labels: face, sunglass, hat, scarf, hair, upper-clothes, leftarm, right-arm, belt, pants, left-leg, right-leg, skirt, leftshoe, right-shoe, bag and dress.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 11,
+                            "text": "ATR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It only reflects the intuition that in the process of data collection, the relative percentages of different classes are approximately in accordance with a prior distribution that is independent of domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These methods are not directly applicable for mining co-evolving spatial event sets since there is no explicit transaction concept in a spatio-temporal dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second plot in  demonstrates that the behaviour of LL+Exp* in newswire collections is similar to each other.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the performance of our method and the baseline methods on three popular datasets, DAVIS2016 , FBMS59  and SegTrackV2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 122,
+                            "text": "DAVIS2016 , FBMS59  and SegTrackV2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our approach with the blog datasets of the NYC and Disneyland, consisting of more than 20K blog posts with 140K associated images.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 73,
+                            "text": "blog datasets of the NYC and Disneyland",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, this table lists the number of clusters k * identified in the dataset, the associated quality measure",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The presence of distinctive and effective manual runs in the TREC-8 and MB2013 collections is obvious in the LOU test results.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 90,
+                            "text": "TREC-8 and MB2013 collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In general, other than participant specific random effects, we may even consider a separate \"site\" or \"dataset\" specific random effect.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compared these reconstructions with the clean data, X te and Y te , in terms of the signal-to-noise ratio: We present the recovery approach on synthetic data and the direct reconstruction approach on the face dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 207,
+                            "end": 219,
+                            "text": "face dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly to the Wine Quality dataset, the attributes of the Wine dataset are the results of 13 chemical analysis tests on 178 wines.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 37,
+                            "text": "Wine Quality dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 61,
+                            "end": 73,
+                            "text": "Wine dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The home dataset contains 86 categories, e.g., sheets, furniture, pillows and cups.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "home dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unlike the two, the SVHN (S) dataset is a real-world Digits dataset of house numbers in Google street view images and contains 100k cropped Digits images.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 36,
+                            "text": "SVHN (S) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, image data is difficult to manually divide into the discrete domains required by adaptation algorithms, and the standard practice of equating datasets with domains is a weak proxy for all the real conditions that alter the statistics in complex ways (lighting,  pose, background, resolution, etc.)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly, for the portrait dataset we achieve an improvement of 30% over the baseline algorithms.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 35,
+                            "text": "portrait dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To conduct an experiment, we need a training dataset, a historical dataset and a test dataset derived from the original dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, the results demonstrate that the proposed bias correction using a linear model on a small validation set is capable to correct the bias.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The right panels of figures 2 and 3 show the test set error rates from arc cosine kernel degree (n) and levels of recursion ( ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, we denote the number of missing points (or gaps) from the full grid to be LM \u00a1N. Two GP formulations are developed which enable fast training and inference on a dataset with this partial grid structure.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "cuses on natural language question-answering over selected RDF datasets, DBpedia and MusicBrainz .",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 96,
+                            "text": "DBpedia and MusicBrainz",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The audio dataset used in this task focuses on the complex street acoustic scenes that consist of different traffic levels and activities.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A special case arises in terms of the kNN based classifiers in the label-compatible scenario: during the actual classification step, regardless what metric is used, the kNN training data set can either consist of only task specific",
+                    "annotation_spans": [
+                        {
+                            "start": 169,
+                            "end": 190,
+                            "text": "kNN training data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The aspect prior can be estimated from the coverage of aspects in the document collection, if the latter is considered as a fair representative sample space for users' information need intents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are some publicly available Web collections built for research purposes.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 49,
+                            "text": "Web collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Real testing datasets We evaluated our method on three publicly available non-Lambertian photometric stereo datasets, namely the DiLiGenT benchmark , Gourd&Apple",
+                    "annotation_spans": [
+                        {
+                            "start": 129,
+                            "end": 147,
+                            "text": "DiLiGenT benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Yelp dataset 5 contains the user rating records for 22 categories of businesses on Yelp over ten years.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "Yelp dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, the method materializes the star neighbor relationships of a spatial dataset instead of finding all maximal clique relationships directly which is computationally expensive.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We build a large and reasonable dataset for analyzing user posting behavior on Twitter.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we compare our method with  on the YCB-Video dataset in terms of the 2D projection and the ADD(-S)",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 57,
+                            "text": "YCB-Video dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We measure the generalizability of our W-RPN proposals across different network architectures, weaklysupervised approaches, and datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the purpose of comparison, we used data sets from the UCI machine learning repository.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 89,
+                            "text": "UCI machine learning repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our proposed approach, IARM, is evaluated with two benchmark image datasets, namely, Ground Truth Database of University of Washington (UW) [5] and the real-world MIRFlickr .",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 106,
+                            "text": "Ground Truth Database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 163,
+                            "end": 172,
+                            "text": "MIRFlickr",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the experiments, we firstly train the object learners with all the images in COCO training set including the eight novel objects, and the LSTM is pre-trained with all the sentences from CO-CO training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 97,
+                            "text": "COCO training set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 189,
+                            "end": 207,
+                            "text": "CO-CO training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Notice, that these systems use all the available views (8 cameras in our dataset) as input, whereas our framework relies on a single RGBD view.   .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As additional demonstration of this dependence on the specific binary task, we compare the preference surfaces for distinguishing different classes of the UCI datasets 'Letter Recognition' and 'Optical Recognition of Handwritten Digits.'",
+                    "annotation_spans": [
+                        {
+                            "start": 169,
+                            "end": 187,
+                            "text": "Letter Recognition",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 194,
+                            "end": 235,
+                            "text": "Optical Recognition of Handwritten Digits",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There is a recently published dataset for evaluating the retrieval process in systematic reviews .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, most studies that have dimension-wise disentanglement use simple datasets such as celebA and chairs.",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 101,
+                            "text": "celebA",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MNIST dataset contains 60,000 train and 20,000 test images of ten digits with the classification tasks of determining which digit an image corresponds to.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 13,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The size of the document collections we use is actually no insulation against statistical issues, since measurements for individual topics typically depend on the small sets of documents judged relevant.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In between FCVID and ImageNet datasets, there are 34 categories which share common definitions and thus we build the subset of FCVID and ImageNet, named FCVID-S and ImageNet-F, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 38,
+                            "text": "FCVID and ImageNet datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 127,
+                            "end": 132,
+                            "text": "FCVID",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 137,
+                            "end": 145,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We adapt the real-time chat interface from  to pair two AMT workers to have an English-language conversation about a video from the Charades Dataset ( ).",
+                    "annotation_spans": [
+                        {
+                            "start": 132,
+                            "end": 148,
+                            "text": "Charades Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ALLAML data set contains in total 72 samples in two classes, ALL and AML, which contain 47 and 25 samples, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 15,
+                            "text": "ALLAML data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the dataset, we create instances of the problems we consider as follows.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All the data sets use Gov2 collection as the document set, and the collection consists of 25.2 million web pages crawled from the .gov domain.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 37,
+                            "text": "Gov2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, as of the date of paper submission, on KITTI and MPI Sintel benchmarks, our method achieves the best performance among published deep unsupervised optical flow methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 84,
+                            "text": "KITTI and MPI Sintel benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The sequences in the database were selected from the Astral database, based on the E-value threshold of 10 \u221225 for removing similar sequences from it.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 68,
+                            "text": "Astral database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When using no classifiers, we obtained the best R 1 performance with t tol = 5 (out of t tol \u2208 {2, 5, 20}) on the newsgroup and Industry data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 114,
+                            "end": 146,
+                            "text": "newsgroup and Industry data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "H1 has been studied in our initial analysis of YouTube dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 62,
+                            "text": "YouTube dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset also consists of paired samples synthesized from 240 fps videos.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each N , 20 synthetic datasets are generated and the plots are made by averaging the results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, in many real-world multi-label datasets, the label assignments for training instances can be incomplete.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To retrieve the crosslingual related queries, a built-in-house French-English bilingual lexicon (containing 120,000 unique entries) and the Europarl corpus are used.",
+                    "annotation_spans": [
+                        {
+                            "start": 140,
+                            "end": 155,
+                            "text": "Europarl corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 63,
+                            "end": 95,
+                            "text": "French-English bilingual lexicon",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Qualitative results  visualizes the road extraction results of different methods in different testing areas of Beijing and Shanghai, trained using Beijing dataset only.",
+                    "annotation_spans": [
+                        {
+                            "start": 147,
+                            "end": 162,
+                            "text": "Beijing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There may also be some intrinsic advantages over supervised training on a fixed data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental evaluation on real-world datasets is presented in Section 4 with discussions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A fourth portion is used as validation set V, which, in turn, is used to \"learn\" the solutions (i.e., to compute the Fitness function in the GP evolutionary process, and to learn vector W in RankSVM) and to tune parameters of all recommendation methods (including the RankSVM based strategy, using cross-validation in V).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To begin with, and to provide a comparison to other state of the art methods, we evaluate the performance of the proposed algorithm for solving Equation (5) against state of the art batch algorithms for CSC: the SBDL algorithm , the algorithm in  and the algorithm presented in , all using the same settings on the Fruit dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 315,
+                            "end": 328,
+                            "text": "Fruit dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use two classes, Watercolor and Pen Ink, of BAM  dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 59,
+                            "text": "BAM  dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The underlying dataset used in the testset is the Billion Triple Challenge 2009 dataset which consists of 1.3 billions RDF triples crawled from different domains of the LOD cloud.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 87,
+                            "text": "Billion Triple Challenge 2009 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One of the main objectives of our demonstration is to share the unique user experience of online search powered by neural networks on a large collection of high-quality images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although YouTube-VOS and DAVIS benchmarks can be used for training and evaluating the models without using the annotations given at the first frame, both benchmarks have the limitation that not all objects appearing in the video are annotated.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 41,
+                            "text": "YouTube-VOS and DAVIS benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments, we demonstrate the quantitative and qualitative effectiveness of the latent SMM on standard BoW text datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 129,
+                            "text": "BoW text datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then the anomaly score for each variable is computed by evaluating the distances between the fitted conditional distributions within the Markov blanket for that variable, for the (two) data sets to be compared.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the performances obtained by our model seem to suggest that it can greatly benefit from term dependencies, on a variety of collections, even when a small window size is used.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report the result on NYU  testing set and plot the percentage of frames in which all joints are below a threshold.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 40,
+                            "text": "NYU  testing set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our final experiment applies the eigenfunction approach to the whole of the Tiny Images dataset (79,302,017 images).",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 95,
+                            "text": "Tiny Images dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A detailed description of our dataset is provided in Section 5.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides, we downloaded 1,000,000 social images from Flickr to form a background class and combine it with the second database to form a large scale data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These datasets are fully calibrated and contain images captured by a forward-looking camera mounted on a vehicle driving through a city.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The running times of our algorithm on all three datasets are given in",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "User generated categories can be found from various source domains: Twitter list, Flickr collection and set, Del.icio.us hierarchy and Wikipedia or News categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 107,
+                            "text": "Flickr collection and set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We now describe our general framework for indexing and querying versioned document collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The datasets used for the experiments in this paper are briefly introduced below: ICDAR 2015.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 92,
+                            "text": "ICDAR 2015",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We describe how we used a data set of chorale harmonisations composed by Johann Sebastian Bach to train Hidden Markov Models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Online Consensus Maximization on Complete Datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Due to the increasing interest, there have also been numerous attempts for building test collections for sentiment analysis .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Regarding those orientation agnostic images in the dataset, automatically assigned rotation labels usually contain noise, which naturally leads to a positive unlabeled learning problem.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This may lead to an end-to-end framework without the need of costly collections of ground truth data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments (both on standard collections, such as TREC, and on Web-like repertoires) show that the use of virtual regions is beneficial for retrieval effectiveness.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 59,
+                            "text": "TREC",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This process will be repeated in rounds until there is no remaining target data for adding to the training dataset, or enough target data have been incorporated.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It should be noted that V T is restricted to be a collection of K normalized rows of A, while there is no constraint on U .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also applied our method to MNIST, the standard handwritten digits data set, and Nursery, one of the larger UCI data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 35,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 110,
+                            "end": 123,
+                            "text": "UCI data sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 83,
+                            "end": 90,
+                            "text": "Nursery",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i \u2208 I, defined as follows: D i is thus built only from those transaction t in the original dataset that contain i by removing all the items preceding i according to the total order",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is not because restaurant is a more frequent keyword in the Twitter dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 80,
+                            "text": "Twitter dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The differences in physical metrics between the two datasets can be attributed to the higher reconstruction accuracy for ObMan but also to the noisy object ground truth in FHB C which produces penetrated and likely unstable 'ground truth' grasps.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We now show how the Recursive Neural Tensor Network (RNTN) from  can be adapted using pseudo-ensembles, and evaluate it on the Stanford Sentiment Treebank (STB) task.",
+                    "annotation_spans": [
+                        {
+                            "start": 127,
+                            "end": 165,
+                            "text": "Stanford Sentiment Treebank (STB) task",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The real data experiment is conducted on 6 public benchmark feature selection datasets including one object image dataset, i.e., COIL100 , one hand written digit image dataset USPS ,",
+                    "annotation_spans": [
+                        {
+                            "start": 129,
+                            "end": 136,
+                            "text": "COIL100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 176,
+                            "end": 180,
+                            "text": "USPS",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show the poor performance of the CosSim and Duet-distributed models on the four target topics when trained on the remaining collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is crucial because, as opposed to general image recognition databases such as MS-COCO and ImageNet, where better prediction of large objects is usually achieved by using deeper layers to improve viewpoint-and scale-invariance, the tissue structures to be recognized in our Atlas database seem to be small viewpoint-and scale-invariant textures, so using deeper layers will be redundant and may even promote overfitting.",
+                    "annotation_spans": [
+                        {
+                            "start": 278,
+                            "end": 292,
+                            "text": "Atlas database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 83,
+                            "end": 90,
+                            "text": "MS-COCO",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 95,
+                            "end": 103,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our results on the MS-COCO dataset demonstrate that the Transformer does indeed benefit from incorporating spatial relationship information, most evidently when comparing the relevant sub-metrics of the SPICE captioning metric.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 34,
+                            "text": "MS-COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We spilt the dataset into 6,079 training, 1,000 validation and 1,000 testing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For that purpose, the remaining 20 query examples (5 for each class) from the NTCIR-TQIC training data set are used.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 106,
+                            "text": "NTCIR-TQIC training data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent developments in deep learning  have influenced the design of re-id algorithms as well, with deep re-id algorithms achieving impressive performance on challenging datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(a) displays the true rank of the first point returned by LSH and predictive indexing on the MNIST data set as a function of \u03b1, averaged over all points in the test set and over multiple trials.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 107,
+                            "text": "MNIST data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset consists of tweets originating from Egypt in the time period from January through March 2011.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To study the effectiveness of the strategies proposed in the previous section, we evaluate the QPRP in the context of subtopic retrieval, employing the TREC 678 interactive collection with the subtopics judgements described in , and the recent ClueWeb collection (part B only), along with the topics defined for the Web diversity track.",
+                    "annotation_spans": [
+                        {
+                            "start": 244,
+                            "end": 262,
+                            "text": "ClueWeb collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 152,
+                            "end": 183,
+                            "text": "TREC 678 interactive collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The LSP dataset and its extended training set contain 11k training images and 1k testing images from sports activities.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "LSP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on benchmark datasets demonstrate that LO-Net outperforms existing learning based approaches and has similar accuracy with the state-of-the-art geometry-based approach, LOAM.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For sparse datasets, like Abstracts, and the two BMS datasets, neither Slim nor Krimp can identify much structure, whereas Slim can describe Pumsb(star)",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 61,
+                            "text": "BMS datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 141,
+                            "end": 152,
+                            "text": "Pumsb(star)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For translating English documents and queries, we used the Penn Treebank tokenizer to preprocess them 3 .",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 72,
+                            "text": "Penn Treebank",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further use the associated semantic representations to greatly reduce biases within the dataset and control for its question type composition, downsampling it to create a 1.7M balanced dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the PoliticalBook data set, we use the testing procedure of the latent Wishart process (LWP) model  for evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 30,
+                            "text": "PoliticalBook data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Last, we conduct an extensive empirical evaluation on several real data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Because we exclude direct re-tweets from our data set, the majority of the Twitter profiles we collect contain between 3000 and 3200 tweets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the following subsections, we perform the semantic segmentation and surface normal prediction on NYU v2 dataset , and the age and gender classification on the IMDB-WIKI dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 179,
+                            "text": "IMDB-WIKI dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 100,
+                            "end": 114,
+                            "text": "NYU v2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For  the SIFT data set (using the same parameters), using twice more queries, the solution improves from 1.4 to 1.16, and with three times more, it improves to 1.04, and with four times more, it improves to 1.05.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 22,
+                            "text": "SIFT data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on a real-world dataset from Instagram, the social media platform on which the highest percentage of users have reported experiencing cyberbullying, reveal that the proposed architecture outperforms the state-of-the-art method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Empirical results on the HEPT dataset under the WIC model are reported in , to compare the performance of IMRank with different initial rankings, as well as the performance of those rankings alone.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 37,
+                            "text": "HEPT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Heterogeneity is described in terms of the number of different agents that appear in the overall dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "from a collection of probability distributions P = {P 1 , P 2 , . . . }, respectively, where distribution P i+1 may depend upon the results of experiments 1, 2 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate how our proposed approach compares to other baselines, we use Task 2 of the Document Understanding Conference dataset (DUC 2001) which is designated for multidocument summarization.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 129,
+                            "text": "Document Understanding Conference dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However similar phenomena can be observed on the other data set as well.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, patches with low gradient magnitudes are discarded from the training dataset, which provides better naturalness implicitly.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Formally, sub-structure mining is to find the complete set, but no-duplication, of all significant (generally frequent) sub-structures from dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the graph derived from the DBLP data set, there are two types of nodes: author and conference.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 43,
+                            "text": "DBLP data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can only use this corpus since the amount of event questions was too small in previous TREC collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 106,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the dataset only provides the ground truth point clouds, we follow MVSNet  to generate the rendered depth maps for training.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Alternatively, when including observation noise in the model, computational tractability has been limited to smaller datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our first experiment, we computed simplex traces using 3 selected columns on a synthetic data sets of moving Gaussians, see .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then the function F can be learned in a large-margin framework through the training set {(x (i) , y (i) )} T i=1 by minimizing the objective function: such that for all i and all y \u2208",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the datasets ICDAR2013,  achieves the best accuracy but it requires character-level bounding box annotations.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 26,
+                            "text": "ICDAR2013",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the evaluation of our denoising algorithm as a preprocessing step for SSL, we used the benchmark data sets from .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Abstracts dataset contains the abstracts of all accepted papers at the ICDM conference up to 2007, where words are stemmed and stop words removed .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "Abstracts dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unfortunately, no query-level features are available in public LTR datasets (Section 6.1), thus we use zero initialization for the hidden state:",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 75,
+                            "text": "LTR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the three simulated datasets WL-PRID 2011, WL-iLIDS-VID and WL-MARS, the raw videos of these datasets are unavailable, so we formed the simulated datasets as follows.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 45,
+                            "text": "WL-PRID 2011",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 47,
+                            "end": 59,
+                            "text": "WL-iLIDS-VID",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 64,
+                            "end": 71,
+                            "text": "WL-MARS",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection of inputs having the same face allocation jointly across the K units constitutes the r th partition cell (region) of the layer PD.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Forest Cover Type dataset from UCI KDD Archive contains data on different forest cover types, containing 581,012 records.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 29,
+                            "text": "Forest Cover Type dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data collection process last for two months, May and June 2013.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since many objects in the datasets are symmetric, we use the symmetric version of these two metrics and report their REP-5px and ADD-0.1d values.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If the sample data set {I(t), V (t)} employed in learning is large enough, the spike occurrence can be predicted by estimating an empirical probability of a spike being generated at the time t, given a time-dependent orbit of an estimated output, {v(t)}, as In a practical experiment, however, the amount of collectable data is insufficient for estimating the spiking probability with respect to any orbit of v(t).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unlike Slim, both postprocess materialised collections of candidate patterns, and partition the data instead of summarising it.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SogouT-16 provides several related datasets, such as link structure, word embeddings, and query log of NTCIR-WWW topic set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the NTIRE 2017 Challenge on Single Image Super Resolution, a high-quality dataset DIV2K  is newly released.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 90,
+                            "text": "DIV2K",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the rcv datasets, the BB-tree range search algorithm is an order of magnitude faster than brute search except of the the two datasets of highest dimensionality.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 19,
+                            "text": "rcv datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, for the neighborhood kernel computed on the unlabeled subset ( 4000 sequences) of the SCOP dataset, using abstraction/ clustering-based kernel (BLOSUM) achieves the mean ROC50 70.14 compared to ROC50 67.91 using the standard mismatch string kernel.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 111,
+                            "text": "SCOP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, DNS is expected to lead larger improvements on the datasets which are more long tailed 3 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the current data sets in the community are either of small scale or do not mimic the real-world testing data sufficiently well, which limits the potential of models trained on these datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar trends are also likely to be observed on the larger NIPS2012 corpus if we allow the values of \u03b7 to be even smaller than 0.001.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 75,
+                            "text": "NIPS2012 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have run a second batch of experiments in which we randomly split the Reuters-21578 test set in 10 equallysized parts (about 330 documents each), we run each ranking method on each such part individually, and we average the results across the 10 parts.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 95,
+                            "text": "Reuters-21578 test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MG4J is faster than Lucene in all but Corpus C. BitFunnel's overall performance relative to PEF improves as document lengths increase.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset along with further information are available at visualreasoning.net.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Partially due to these conceptual and practical constraints, datasets for visual recognition are not deliberately collected with clearly identifiable domains .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unfortunately, the collection of frequent itemsets extracted from a dataset is often very large.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This collection is stemmed using the Krovetz stemmer .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the validation set is only used for tuning model parameters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate AROW on both synthetic and real data, including several popular datasets for document classification and optical character recognition (OCR).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These methods first assume that the data points (nearly) reside on a low-dimensional manifold (which is called manifold assumption in ), and then try to discover such manifold by preserving some local structure of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After that, we fix the hyper parameters and the validation set are added to the original training set, then we retrain the model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For qualitative testing on scanned data, we used the NYU Depth V2 dataset  and the recent ScanNet dataset , which include RGB-D images of indoor scenes.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 105,
+                            "text": "ScanNet dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 53,
+                            "end": 73,
+                            "text": "NYU Depth V2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the evaluation protocol of the PASCAL VOC dataset, we report detection results on the PASCAL test set using detection average precision.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 59,
+                            "text": "PASCAL VOC dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 96,
+                            "end": 111,
+                            "text": "PASCAL test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the performance of our annotators given CONLL training datasets  and the word embeddings as features.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 69,
+                            "text": "CONLL training datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, none of these collections can be used for sentiment change and sentiment spikes' trigger detection because most of them span over a short period of time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results demonstrate that the Hadamard transform can indeed outperform shuffling in terms of accuracy on large scale datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, it would be interesting to examine the possibility of learning the number of clusters from data based on Dirichlet process mixture models, or to extend our probabilistic framework for discriminative motif discovery.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In most of these applications, there will be gaps in the training dataset which may be caused by missing observations, presence of obstructions or irregular domain boundaries, or data corruption .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Key to the effectiveness of such a method is that the dictionary be a representative sample of a collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To illustrate the convergence of our TPN, we visualize the evolution of the embedded representation of a subset on VisDA 2017 dataset (10k samples for each domain) with t-SNE during training.",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 133,
+                            "text": "VisDA 2017 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unfortunately, there are few available real-world datasets for evaluating causal discovery algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The sizes of these datasets range from small scale (thousands samples) to mid-large scale (half a million samples).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our model using two public multimodal sentiment datasets: MVSA-Single and MVSA-Multi .",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 81,
+                            "text": "MVSA-Single",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 96,
+                            "text": "MVSA-Multi",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A set of ingredients is a variable sized, unordered collection of unique meal constituents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(2) Compared with rational polynomial filters, DFNet improves upon CayleyNet and ARMA 1 by 3.3% and 1.8% on the Cora dataset, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 124,
+                            "text": "Cora dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly to the Quote dataset, central nodes in the twitter dataset corresponds to users that are on many information-propagation paths.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 30,
+                            "text": "Quote dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Enron dataset with 0.5M records, the method with pruning can reduce the time from 30 ms to 17 ms.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 21,
+                            "text": "Enron dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This led to a more conservative choice of a learning rate for the Image Aesthetics dataset (10 \u22124 ), and a larger setting for the Adience dataset (10 \u22123 ).",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 90,
+                            "text": "Aesthetics dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 130,
+                            "end": 145,
+                            "text": "Adience dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data sets D500, D5000, D10000, D20000, D100000 were used in NIPS 2003 challenge on feature selection .",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 18,
+                            "text": "D500",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 20,
+                            "end": 25,
+                            "text": "D5000",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 27,
+                            "end": 33,
+                            "text": "D10000",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 35,
+                            "end": 41,
+                            "text": "D20000",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 43,
+                            "end": 50,
+                            "text": "D100000",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When combined, the above improvements lead to an execution time of less than 1 hour for a dataset with 10 8 comparisons; the na\u00efve greedy algorithm on the same dataset would require more than 10 days to terminate.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset includes multiple statistical language models created from text resources on the web, including queries received by Bing over a 10 month period.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Additionally, two image salient object segmentation datasets, DUT-O  and PASCAL-S , offer both static gaze data and segmentation annotations, and are thus also used in our training phase, resulting in totally 6,018 static training examples.",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 67,
+                            "text": "DUT-O",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 81,
+                            "text": "PASCAL-S",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, we first sample the dataset, and then use k-Space to compute the mapping S \u00b7 R",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a dataset with known ground truth, generate n \u03c0 partitions with different properties by varying the parameters of one or more clustering methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The SASI-Amazon (SASI-AM) dataset comprises of 180 sentences from Amazon product reviews annotated by three annotators as sarcastic or non-sarcastic.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 33,
+                            "text": "SASI-Amazon (SASI-AM) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On standard benchmarks, GI estimates illumination more accurately than state-of-the-art learning-free methods in about 0.4 seconds.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, it is also difficult to train Prior Networks using this criterion on complex datasets with many classes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The inputs are the query Q, the dataset N, the user confidence user_conf (or tolerance for false dismissals), and the set of standard deviations StdDev obtained from BuildErrorDistribution in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "N-gram usually requires more computational effort than Stopword since Stopword has a fixed number of features for all datasets while N-gram needs to be generated separately and usually contains thousands of features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the standard evaluation protocol , we report results on the first split of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Search accuracy is the percentage of optimal prototypes found on the full test set of 1320 patterns in a single category (solid lines).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "; see http://www.siam.org/journals/ojsa.php for the original dataset and their associated z scores as described here gives better results than the use of either Q values or z scores alone.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These two factors separate CQA collections from that of the task defined by the collection proposed in this paper.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 42,
+                            "text": "CQA collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our method on the KITTI 2012 and 2015 datasets, in which there are 194 and 200 pairs of flow and stereo with high quality annotations, covering 28 scenes of the KITTI raw dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 58,
+                            "text": "KITTI 2012 and 2015 datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 173,
+                            "end": 190,
+                            "text": "KITTI raw dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Other infamous datasets for action recognition do not meet the properties of complex actions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation Measures and Baselines Results using NUS-WIDE Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 64,
+                            "text": "NUS-WIDE Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, it is also interesting to note that the performance of TF-PRF first increases with the increase of |D f | (this is especially obvious on the disk4&5, WT2G and GOV2 collections), and then after the peak point it stays relatively stable on all collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 154,
+                            "end": 188,
+                            "text": "disk4&5, WT2G and GOV2 collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the object detection baseline, we train a separate object detector for each task on our train set and infer on the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Deletion of a vector from the data set can be done in constant time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, it achieves the best performance on all the six datasets evaluated by P rec@10, and on all but the LiveJournal dataset evaluated by AUC.",
+                    "annotation_spans": [
+                        {
+                            "start": 114,
+                            "end": 133,
+                            "text": "LiveJournal dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We measure the performance on the DUC 2007 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 50,
+                            "text": "DUC 2007 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At warming-up step, we train DR branch using synthetic dataset  only for 10 epochs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Bayes error and the misclassification rate of a QDA classifier with 5-fold cross validation applied to the Iris dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 119,
+                            "text": "Iris dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset similarity is then used to judiciously select significant subgraphs from similar (related) datasets to the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, both the ArnetMiner as well as the W3C test collections encompass candidate experts from a single knowledge area, namely, computer science and web standards, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 38,
+                            "text": "ArnetMiner",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 54,
+                            "end": 74,
+                            "text": "W3C test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are two evaluation protocols for this dataset: Cross-Subject (CS) and Cross-View (CV) .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, consider a hypothetical data set containing articles by each of two authors such that half of the articles authored by each author discusses one topic, and the other half discusses another topic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results for the DBLP and Citation data sets are illustrated in Figures 6(a) and (d) respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 47,
+                            "text": "DBLP and Citation data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We ran five publicly available search systems, in a total of seventeen different configurations, against nine TREC adhoc-style collections, spanning 1994 to 2005.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate D 3 TW on two weakly supervised tasks in two popular benchmark datasets, the Breakfast Action  and the Hollywood Extended .",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 105,
+                            "text": "Breakfast Action",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 115,
+                            "end": 133,
+                            "text": "Hollywood Extended",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, these methods show weaknesses in other adversarial settings such as being unable to handle larger perturbations, unable to simultaneously handle many different resolutions, and not scalable to large datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To enable regularized parametric censored regression to handle high-dimensional censored datasets, in this paper, we propose the \"URPCR\" model, which stands for \"Unified model for Regularized Parametric Censored Regression\".",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For RankSVM, we tested two kernel functions, Linear and Radial Basis (RBF), choosing the former as the latter did not scale to our data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a result, existing SBIR datasets suffer in both volume and variety, leaving only less than thousand of sketches per category, with maximum number of classes limited to few hundreds.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 35,
+                            "text": "SBIR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The linear predictor Z = \u039bX learned from a position-normalized training dataset gives exactly the position-unbiased CTR estimation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can see from  that our model performed best when n = 2, and there were statistical differences (McNemar's test; p < .001) between n = 1 and n = 2 on NPB and  MLB data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 152,
+                            "end": 174,
+                            "text": "NPB and  MLB data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, most prior work in resource selection has studied corpus-based evidence derived from the target collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated LesaNet on the public DeepLesion dataset, which contains over 32K diverse lesion images.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 53,
+                            "text": "DeepLesion dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This behavior is demonstrated on dataset 0095, which does contain a sufficiently large amount of lines in the building facades.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The challenge is to devise a partitioning which creates as few subproblems as possible from the original dataset, and that, at the same time, allows a fast merging of the local results in order to get the actual solution of the mining task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Together with SogouT-16, we also release several related datasets to accelerate the pace of researches.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the pupil dataset, we reduced the size of the network by removing the five central layers, we used a batch size of 64, and stochastic gradient descent with a learning rate of 10 \u22123 and momentum of 0.9.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although no document of the training set describes this incident (because the text collection is ordered by time and the attack took place \"after\" the split into train and test set), the HSOM generalizes well and maps the semantic content of these documents to the proper area of the map, located between the regions for crude and ship.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is about 1.4 GB large, and is available from the FIMI repository.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 76,
+                            "text": "FIMI repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Most of them rely on extracting evidences from textual descriptions of the content, treating the text corpus as a set of objects with associated attributes, and applying some classification method to detect spam .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on two benchmark datasets show that the proposed emotion-aware saliency model outperforms other state-of-the-art methods, especially in terms of predicting the relative importance of salient regions within an image (see ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To the best of our knowledge neither curvature nor self-similarity was used to perform object detection on a dataset of similar complexity as the PASCAL dataset so far.",
+                    "annotation_spans": [
+                        {
+                            "start": 146,
+                            "end": 160,
+                            "text": "PASCAL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is reported to have achieved the best result (F1-Measure of 0.908) on the CoNLL 2003 test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 96,
+                            "text": "CoNLL 2003 test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We note that even the largest meta data size chosen, about 97 MB for our 10% data set, would result in a meta data size of slightly less than 1 GB on the complete Wikipedia data of 2.4 million articles and over 80 million versions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The datasets are captured using DAVIS camera, and have many series of scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\u00d7 16 pixels extracted at random positions from random images of the van Hateren natural image database .",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 102,
+                            "text": "van Hateren natural image database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These scores are competi-tive with published reports of the performance of nearestneighbor based systems on the same dataset (see ), which makes the subspace approach appealing because of its lower overhead for storage, updates, and predictions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train an LSTM language model , which is widely used for text generation from images, as a baseline method with the same dataset as the proposed method ( ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ICDAR 2015 dataset  is collected for the ICDAR 2015 Robust Reading Competition, with 1000 natural images for training and 500 for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 22,
+                            "text": "ICDAR 2015 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the dog datasets, we also tried using the local binary pattern KDES to learn templates instead of the edge KDES due to the relative consistent textures in dog images.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 20,
+                            "text": "dog datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The implementation of specific models in the TREC dataset is based on our task classification ( .",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 57,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used email messages from two separate corpora: the TREC 2005 Public Spam Corpus (trec05p-1) , and the CEAS 2008 Live Challenge private corpus .",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 144,
+                            "text": "CEAS 2008 Live Challenge private corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 54,
+                            "end": 94,
+                            "text": "TREC 2005 Public Spam Corpus (trec05p-1)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 144,
+                            "text": "CEAS 2008 Live Challenge private corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We aim at a learner that can imitate human experts' policy for camera planning by \"watching\" a large collection of professional videos from experts.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The computation of g n,\u03c4 involves the n\u03c4 input observations with largest norm, and the minimization is performed over a collection of classifiers of finite VC dimension.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "BC is a major kernel of the HPCS Scalable Synthetic Compact Applications graph analysis benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 97,
+                            "text": "HPCS Scalable Synthetic Compact Applications graph analysis benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Independence from collection statistics is beneficial when access to the entire data set is prohibited in situations such as peer-to-peer network environments , and meta-search engines.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are two common settings for evaluating amodal segmentation: the class-agnostic setting, e.g., the COCOA dataset evaluation in  and the class-specific setting, e.g., the COCOA-cls and D2S evaluation in .",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 117,
+                            "text": "COCOA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Adult dataset is an example where all methods perform nearly identically; the surrogate loss used in practice seems unimportant.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "Adult dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results on two different QA datasets: Answerbag and Jeopardy!",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 51,
+                            "text": "Answerbag",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 56,
+                            "end": 64,
+                            "text": "Jeopardy",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The discharge summaries used for the task originate from the anonymized clinical free-text notes of the MIMIC II database, version 2.5  .",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 134,
+                            "text": "MIMIC II database, version 2.5",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It obtains better performance than many state-of-theart networks in the ImageNet-1K classification benchmark and MS-COCO object detection and segmentation benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 113,
+                            "end": 164,
+                            "text": "MS-COCO object detection and segmentation benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "muSSP is proved to provide exact optimal solution and we demonstrated its efficiency through 40 experiments on five MOT datasets with various object detection results and a number of graph designs.",
+                    "annotation_spans": [
+                        {
+                            "start": 116,
+                            "end": 128,
+                            "text": "MOT datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hence, one approach to selecting the number of partitions k in a dataset is to maximize Q(P) with respect to k.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The primary challenge is that because all past versions are retained, a versioned document collection is much larger than a collection that only keeps the latest version.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To this end, we annotated 3 datasets and conducted several experiments to unveil the secrets of deep saliency models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The test set contained   shows the distribution of the probabilities when completing one complete-able and one uncomplete-able triplet in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "20 independent datasets were randomly generated for each same-parameter setting, and we evaluated the average performances among the 20 sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Test collections play a very important role in empirical study.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The whole dataset is divided for development and evaluation, and each audio is 3-5 minutes long.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To test over-fitting, we plotted histogram of energies for CIFAR-10 train and test dataset in  and note almost identical curves.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 90,
+                            "text": "CIFAR-10 train and test dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The classification results on Scene data set are given in .",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 44,
+                            "text": "Scene data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A simple way to compute the mutual information is to consider the whole collection; but, this choice may not be ideal for ambiguous terms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the PASCAL-Context dataset, compared with the previous state-of-the-art results, the proposed method achieves improvements of 2.1% and 0.9% in terms of the pixel-wise accuracy and mean IoU, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 29,
+                            "text": "PASCAL-Context dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since previous methods are evaluated by estimating the absolute 3D depth of 3D hand joints, we follow them by finding an approximate hand scale using a single frame in the dataset, and fix the scale during the evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A Bayesian model is then proposed to estimate the likelihood that a candidate subgraph is significant by summarizing the weighted \"votes\" from the source datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The main data set used in the ambiguity study is described next followed by an analysis of it.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the more challenging finegrained SUN and CUB datasets, we are able to improve the results by 7 and 2 points.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 57,
+                            "text": "SUN and CUB datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Datasets Model selection Implementation Comparison to published results on Hamming Loss Results on F \u03b2 Conclusion and Future Work.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To convert the number of distances to actual retrieval time, one simply has to divide the number of distances by 890 distances/sec for UNIPEN, 15 distances/sec for MNIST, and 715 distances/sec for the hands data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 164,
+                            "end": 169,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 135,
+                            "end": 141,
+                            "text": "UNIPEN",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the TREC-8 collection, our experiments showed that CCA leads to significant improvements in retrieval effectiveness.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 27,
+                            "text": "TREC-8 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The algorithm outperformed other local and global methods on challenging synthetic and real datasets, finding the global optimum reliably.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We believe that with even larger datasets the Bayesian Sets algorithm will be a very useful tool for many application areas.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data set records the ECG data of abnormal people and normal people.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given an image dataset, we construct a visual graph and a textual graph.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Is it due to the network being fine-tuned to a new task (saliency prediction) or is it the network being fine-tuned to a different set of data (images from a saliency prediction dataset)?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Yahoo data sets include 11 top-level categories such as Computers, Education, Health, Recreation, and Science etc.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 19,
+                            "text": "Yahoo data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since no side information is available for the items in the Flixster or Epinion dataset, K V is assumed diagonal:",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 87,
+                            "text": "Flixster or Epinion dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Along the same lines, one may consider that terms that are very common, such as \"video\" in a YouTube object collection, are too general, whereas very rare terms may be too specific or may represent noise (e.g., misspellings, neologisms and unknown words).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can see from this figure that the predicted attention maps of AG-CNN are close to those of GT, when testing on our LAG database.",
+                    "annotation_spans": [
+                        {
+                            "start": 118,
+                            "end": 130,
+                            "text": "LAG database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One objective of this work is to introduce eye movement recordings for the PASCAL VOC image dataset used for action recognition.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 99,
+                            "text": "PASCAL VOC image dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They found that a rank-14 basis best predicted the test set as measured by average absolute error (MAE) and meansquared error.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comparing the CPU time and memory of the ranking algorithms for the given query and web page on MSN data sion+ on real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since we need ground truth data to compute desired PSNR values, we decided to use a simulated dataset for our second set of experiments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In online settings, with large models and data sets, this may be impractical.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TMC2007 data set [32] (TMC) is from the 7th SIAM SDM conference.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 27,
+                            "text": "TMC2007 data set [32] (TMC)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ey also used enterprise email collections, but their collection was proprietary and their focus was on recipient recommendation, rather than reply behavior modeling.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use this collection of ground truth distances, {\u03c1 GT i } L i=1 , to compute an L 1 loss: It differs from the loss employed by DSAC, which used a SSVM hinge loss to optimize for intersection-over-union.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We applied the SpRBM and CaRBM to the NIPS dataset 2 , which consists of 13649 words and 1740 papers from NIPS conferences from 1987 to 1999.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 52,
+                            "text": "NIPS dataset 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A two-dimensional click model was trained on this high-resolution QAC dataset, revealing users' behaviors such as horizontal skipping bias and vertical position bias.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 77,
+                            "text": "QAC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments in \u00a77.2 and \u00a77.3 are reported on the TREC 3 Robust04 document collection of newswire articles.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 84,
+                            "text": "TREC 3 Robust04 document collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the non-digit datasets, we train the proposed STA models with backbone network VGGNet",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrated the effectiveness of HIM's by applying them to the challenging task of segmentation and labeling of the public MSRC image database.",
+                    "annotation_spans": [
+                        {
+                            "start": 127,
+                            "end": 146,
+                            "text": "MSRC image database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the suitable size of k for feature coverage depends on the real dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The average classification accuracies for the WebKB data set are shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 60,
+                            "text": "WebKB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MNIST-CP consists of 50K and 10K training and testing examples of 2D contour points extracted from the MNIST dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 116,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 8,
+                            "text": "MNIST-CP",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We trained both plain and highway networks of varying varying depths on the MNIST digit classification dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 110,
+                            "text": "MNIST digit classification dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the offline process, the image database first goes through face detection to identify and locate frontal faces in the images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In other words, 25 terms are enough for expanding the query in most collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given that these properties are noticeable statistics of the dataset's conditional answer distribution, not even depending on the specific images, we would expect a sound method to achieve higher scores.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One pitfall of this ad hoc practice is that a dataset could be an agglomeration of several distinctive domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "presents a summary of the TREC corpora used in our experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 39,
+                            "text": "TREC corpora ",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Netflix: This is the classic movie rating dataset used in the Netflix challenge 5 .",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 7,
+                            "text": "Netflix",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the 1000-image, 1024-bit test dataset, we test two additional cameras and two additional displays.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The number of pixels along the equator of all datasets became equal by configuring the size of cube map image as 6 \u00d7 20 \u00d7 20 pixels and the size of ERP image as 40 \u00d7 80.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Lastly, while the methods discussed assume some dataset structure, we demonstrate through the climate modelling problem that with a little insight, structure can be found in many applications to enable exact GP modelling on massive datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is composed of 340,796,067 records with 30,717,251 unique queries, retrieving 10 URLs each.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the ActivityNet Captions dataset , we collected entitylevel bounding box annotations and created the ActivityNet-Entities (ANet-Entities) dataset 2 , a rich dataset that can be used for video description with explicit grounding.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 32,
+                            "text": "ActivityNet Captions dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 101,
+                            "end": 145,
+                            "text": "ActivityNet-Entities (ANet-Entities) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "GEMs provide a framework that generalizes many of the afore-mentioned history-dependent models for event datasets, many of which make the assumption of piece-wise constant conditional intensity functions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a dataset D, a minimum support threshold min supp, and a maximum memory size M em size , we must create the minimum number of partitions such that they can be entirely mined in at most M em size bytes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example,  proposes the Food-101 dataset, containing around 101,000 images of 101 different categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 43,
+                            "text": "Food-101 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "[0, 1] is the calibration factor tuned on a held-out validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The learner's performance loss with respect to the oracle benchmark is defined as the regret, whose value at time T is given by A regret that grows sublinearly in T , i.e., O(T \u03b3 ), \u03b3 < 1, guarantees convergence in terms of the average reward, i.e., R(T )/T \u2192 0.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If it is accepted that the document collection should be regarded as sampled in some way from a population, then as indicated above, each per-topic measurement has some builtin error of estimation because the measurement is based on the sample rather than the population.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TF-IDF outperformed BM25 for the WBR99 collection, probably due to the way that the relevance information was obtained.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 49,
+                            "text": "WBR99 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct extensive experiments on the Shang-haiTech, WorldExpo'10, UCF CC 50, and UCSD datasets, and demonstrate the effectiveness and efficiency of PACNN over the state-of-the-art.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 97,
+                            "text": "Shang-haiTech, WorldExpo'10, UCF CC 50, and UCSD datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On larger experiments using the MNIST and the SARCOS datasets we show that our method can provide superior performance to previously published scalable approaches that have been handcrafted to specific likelihood models.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 61,
+                            "text": "MNIST and the SARCOS datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the log ( ) of Neill's scan statistic on Dataset1 is not directly comparable with others because of different probabilistic model, in general, GridScan(-Pro) always finds the largest maximum log ( ) in all overdensity tests, which indicates the advantage of GridScan(-Pro)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This data set covers four categories: 'autos', 'motorcycles', 'baseball', 'hockey', each with 988, 993, 992 and 997 documents respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Beyond the ablation studies we performed on the CIFAR-10 dataset, we also conduct the experiment on large scale ImageNet dataset with ResNet-18/34/50 (type-b residual connection) network structures.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 64,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 112,
+                            "end": 128,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this setting, there : Predictive performance on test sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, it needs to be trained previously on a data set D 1 with predictors X 1 and corresponding labels y 1 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The papers from the Image Search, Indexing, Retrieval Models, Test Collections, and Web Queries sessions all used public datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Interestingly,  showed that a simple baseline of an RNN with an MLP decoder outperformed many of these prior works on the TrajNet benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 122,
+                            "end": 139,
+                            "text": "TrajNet benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MNIST digit dataset contains 60,000 training and 10,000 test images of ten handwritten digits (0 to 9), with 28\u00d728 pixels.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "MNIST digit dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MSN test set containing 2421 queries.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 12,
+                            "text": "MSN test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply three preprocessing steps to make these four datasets more practical and representative: (1) We exclude queries that did not receive any click or received clicks for every candidate business, since these queries will not influence the average ranking performance in the experiments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since MAD-FULL and MAD-RAND feature non-deterministic sampling steps, we report their average performance obtained across 5 independent runs on the whole dataset with the corresponding standard deviations between brackets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to previous formulation using Parzen density estimation, large databases become now a possibility.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This has motivated a considerable amount of work on scalable approximate GP methods that can be applied to large-scale datasets  and even though signicant progress has been made on this topic, current methods cannot generally achieve signicant gains in scalability without a noticeable deterioration in accuracy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These are the first test collections to be constructed for IR experiments in these languages (except Hindi, which was addressed in the TIDES surprise language exercise ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "max k=1,...,K e f k (x) K l=1 e f l (x) ), denoted as MMC, on the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recall that x t is firstly fed into a sampler \u03c6 spl to produce a collection of proposals (denoted asX",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "where the i.i.d. noise follows \u223c N (0, \u03c3 2 ), the predictive distribution for a testing point x * conditioned on the dataset D, i.e., p(y * |D, x * ), conforms the Gaussian distribution N (\u00b5(x * ), \u03c3 2 (x * )), where Here, we use \u03c3 2",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Data sets: We use a versioned document collection from Wikipedia (WIKI) and a data set from the Internet Archive.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, it is worthy of notice that, since real datasets do not include ground truth images for training and testing, we use their APS images as ground truth for training purposes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, we conducted the same experiment on a larger set of images, consisting of 65 of the CMU+MIT database images 1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 106,
+                            "text": "CMU+MIT database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, the query \"point out the steps to make the world free of pollution\" on the WT10G collection consistently performed poorly in our DAAT framework.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 104,
+                            "text": "WT10G collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The important question, however, is whether these noisy projected judgments are useful for training ranking models in the target corpus.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We annotate 3 datasets for analyzing the relationship between the deep model's inner representation and the visual saliency in the image.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One can expect worse results, but this is a more realistic strategy for very large datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiment, 10-fold cross validation is performed on the small data set to evaluate the multilabel classification performances.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Without test collections containing ambiguous topics with associated relevance judgements that reflect a range of interpretations of that topic, the worth of much of the work described here may not be fully understood.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We present qualitative results of the RevGAN model on the Maps dataset in  and on the Cityscapes dataset in .",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 70,
+                            "text": "Maps dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 104,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare our model with both the classical and variable memory length Markov models on three data sets with different memory and stochastic components.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested the algorithm on the half-billion page English-language subset of the ClueWeb09 collection, and evaluated its effectiveness in detecting web spam by manually inspecting small samples of the detected quilted pages.",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 100,
+                            "text": "ClueWeb09 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the retrieval performances of the baseline (BM25), using faceted feedback (FF) from individual user (User1, 2 and 3 for OHSUMED dataset, User4, 5 and 6 for RCV1 dataset), and the average over three users (FF(Average)).",
+                    "annotation_spans": [
+                        {
+                            "start": 126,
+                            "end": 141,
+                            "text": "OHSUMED dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 162,
+                            "end": 174,
+                            "text": "RCV1 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moving Object Segmentation Benchmarks: We compare our approach with 8 different baselines on the task of moving object segmentation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on toy data and real world data sets illustrate the benefits of this approach.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This model has been chosen for the Graph500 benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 53,
+                            "text": "Graph500 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we use the much larger NTCIR-11 arXiv collection to test the scalability of Tangent-3; this collection is 174 GB uncompressed, with 8,301,578 documents (arXiv article fragments) and 60 million formulae including isolated symbols.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 61,
+                            "text": "NTCIR-11 arXiv collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used MNIST , fashion-MNIST , SVHN , CIFAR10, CIFAR100 , and ILSVRC2015  as datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 13,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 16,
+                            "end": 29,
+                            "text": "fashion-MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 32,
+                            "end": 36,
+                            "text": "SVHN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 39,
+                            "end": 46,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 48,
+                            "end": 56,
+                            "text": "CIFAR100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 63,
+                            "end": 73,
+                            "text": "ILSVRC2015",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i 's) form a very rich collection of statistics, but, for lack of space, we restrict our attention to the two-sample Wilcoxon statistic in this paper.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "8 Summary This paper has described a detailed permutation-based procedure for detecting the presence of significant cluster structure in a dataset using unsupervised, partitionbased clustering algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Overall, the distributions have a shape which complements the uniform, linear, and exponential shapes of distributions in the synthetic datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , our DM-GAN model achieves 4.75 IS on the CUB dataset, which outperforms other methods by a large margin.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 66,
+                            "text": "CUB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A subset of 40 utterances from the training material forms the adaptation training set, to be used for speaker adaptation/normalization purposes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is said to be right-censored if and only if y",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The system detected 7481 entity occurrences in the collection: 26% persons, 10% locations, 57% organizations, and 7% products.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, all the paired image-sentence data from held-out COCO training set are leveraged to optimize our novel object captioning system.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 72,
+                            "text": "COCO training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the results for two synthetic datasets each of size = 2, 500 and with = 150.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "demonstrates the capability of CP-stream to discover events in a real-world streaming dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Due to Twitter's restrictions, it was not possible to extend the available datasets by additional months, since given an entity you can collect the tweets that are published no more than two weeks earlier.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We find that pre-training on all three datasets is beneficial for hand and object reconstructions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This metric is not applicable to UCF dataset because most action instances in UCF dataset start and end at the first and last frame, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 44,
+                            "text": "UCF dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 78,
+                            "end": 89,
+                            "text": "UCF dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Another important issue is that most of the in-core FCIM algorithms usually keep the entire collection of frequent closed itemsets mined so far in main memory, for checking whether an itemset is globally closed or not.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, www.espn.com is a high-level web domain containing a collection of webpages centered around the Sports topic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method is trained on KITTI VO dataset and tested on the selected unseen KITTI 141 subset without any finetuning.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 41,
+                            "text": "KITTI VO dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 76,
+                            "end": 92,
+                            "text": "KITTI 141 subset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The resolution is 1500x1500. DRIVE [39] is a retinal vessel segmentation dataset with 20 images.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 34,
+                            "text": "DRIVE",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This shows that with high \u03ba the portfolio is focusing on a few groups to invest in and  : Risk comparison between competing algorithms for the NYSE dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 143,
+                            "end": 155,
+                            "text": "NYSE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows P @5 results for all methods, datasets and scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is an online method that combines deep learning features with Kalman-filter-based tracking and the Hungarian algorithm for data association, achieving remarkable performance on the MOTChallenge MOT16 benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 181,
+                            "end": 209,
+                            "text": "MOTChallenge MOT16 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The size of input patch is 36 \u00d7 36 pixels on the LSP dataset, and 54 \u00d7 54 pixels on the FLIC dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 60,
+                            "text": "LSP dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 88,
+                            "end": 100,
+                            "text": "FLIC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Query results reflecting the current state of the document collection can be obtained in these approaches by executing queries both on in-memory and disk-resident indexes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Before engaging in the work of creating such a corpus, it was decided to explore methods of simulating such a collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments, we first evaluate SSN in the Ima-geNet classification dataset , which has 1.28M training images and 50k validation images with 1000 categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 81,
+                            "text": "Ima-geNet classification dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In practice, observed dependencies often differ between data sets, precisely because the experimental circumstances were not identical in different experiments, even when the causal system at the heart of it was the same.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "S SHReC, static SHReC built from only the Intranet document collection (see Section 4) and is not updated in any form throughout the test period.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 70,
+                            "text": "Intranet document collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This appears to be a reasonable strategy in the face of such queries, however, there are no publicly available test collections that contain ambiguous queries in order to test such a strategy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This includes generating different windows or architectural styles on the labels2facades dataset, and generating different times of day on the sum-mer2winter dataset as seen in .",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 96,
+                            "text": "labels2facades dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 143,
+                            "end": 165,
+                            "text": "sum-mer2winter dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we perform a number of synthetic dataset experiments in order to demonstrate proof of concept and also to offer empirical evidence supporting our bounds above.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 62,
+                            "text": "hetic dataset expe",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, if the storage of the probability density function requires M times the storage of a single value, then (for a data set with N records and d dimensions) the total storage requirement of the probability density functions in the original data set is given by N \u00b7 M \u00b7 d.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 3.1.2 we move our analysis to the global Desktop collection and investigate expansions based on co-occurrence metrics and external thesauri.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 70,
+                            "text": "global Desktop collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform experiments on two large stereo images datasets, i.e. KITTI  and Cityscapes .",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 70,
+                            "text": "KITTI",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 76,
+                            "end": 86,
+                            "text": "Cityscapes",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a result, traditional models trained over large datasets may fail to recognize highly predictive localized effects in favour of weakly predictive global patterns.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then proceed to develop two alternative formulations;  We can also relax the dataset structure requirements by assuming that groups of inputs form a grid.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Wikipedia corpus is the entire Wikipedia corpus downloaded and indexed by Indri.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 16,
+                            "text": "Wikipedia corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 31,
+                            "end": 47,
+                            "text": "Wikipedia corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, since much effort was dedicated to collecting the dataset, little analysis of such a holistic system was provided.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate GDWCT with various datasets including CelebA , Artworks  (Ukiyoe, Monet, Cezanne, and Van Gogh), cat2dog , Pen ink and Watercolor classes of the Behance Artistic Media (BAM) , and Yosemite  (summer and winter scenes) datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 56,
+                            "text": "CelebA",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 109,
+                            "end": 116,
+                            "text": "cat2dog",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 185,
+                            "text": "Pen ink and Watercolor classes of the Behance Artistic Media (BAM)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 192,
+                            "end": 237,
+                            "text": "Yosemite  (summer and winter scenes) datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 59,
+                            "end": 67,
+                            "text": "Artworks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experiment with two datasets: Reuters RCV1 from TREC 2002 (2.5GB, 50 title-only queries) and INEX 2005",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 105,
+                            "text": "INEX 2005",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 33,
+                            "end": 45,
+                            "text": "Reuters RCV1",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 51,
+                            "end": 60,
+                            "text": "TREC 2002",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Another important example is the web collection at the Internet Archive, consisting of more than 150 billion web pages that have been crawled since 1996 -or",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, we used data augmentation on the training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "No manual annotation is required hence data collection is fully automatic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show the comparison results for Cora datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 45,
+                            "text": "Cora datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated our cross-lingual pairwise similarity algorithm on English and German Wikipedia, selected because they are the largest Wikipedia collections available and because significant amounts of parallel corpora exist for the language pair.",
+                    "annotation_spans": [
+                        {
+                            "start": 132,
+                            "end": 153,
+                            "text": "Wikipedia collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this dataset, we train the model with the training set and evaluate the performance with the testing set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From dataset Dn 0 ,m 0 , perform empirical AUC maximization over S0 \u2282 S, yielding the scoring function\u015d(x) =\u015dn 0 ,m 0 (x).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, we release a new benchmark dataset for demographic prediction in retail business scenario.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We measured the correlation between the document frequency of terms in the collection and the number of queries that contain a particular term in the query log to be 0.424.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our model can overfit the toy dataset, while other models simply generate random guesses and fail in learning the motion.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Performance of I-SSGPR 200 degrades as the offline training data is less representative, while LGR and LWPR perform almost equally well on KUKA 1 and KUKA 2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 139,
+                            "end": 156,
+                            "text": "KUKA 1 and KUKA 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the above operation, we obtain two data streams for each data set, called RCS-based, and PCS-based data streams.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we extended an MT-based context-sensitive CLIR approach , comparing flat and hierarchical phrasebased translation models on three collections in three different languages.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, on Fashion MNIST dataset, our approach outper-forms PixelDefend in FGSM attack and achieves an accuracy of 89.04% and 88.59% against the strongest attack for ResNet and VGG respectively",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 37,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on two challenging benchmark datasets show that our method significantly outperforms the state-ofthe-art methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We build a large and reasonable dataset for analyzing user posting behavior on Twitter.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, we use the MSLR-WEB30k dataset released by Microsoft in 2010 .",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 39,
+                            "text": "MSLR-WEB30k dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to render 2.5D RGB-D frames, we first construct a global point cloud from all of the panoramas provided in an environment from the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, with respect to this data set, the IDC algorithm is not too sensitive to an overestimation of the number NF of feature clusters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Like the silhouette coefficients s(i), Dunn's metric is not applicable to unclustered datasets:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition to frappe, we created two more datasets by converting food and comoda into implicit feedback.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As to Visual7W dataset which has multiple-choice answers provided for each question, we train the multiplechoice VQA model.",
+                    "annotation_spans": [
+                        {
+                            "start": 6,
+                            "end": 22,
+                            "text": "Visual7W dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Prediction performance in the Twitter dataset by means of average rank (AR) and success probability that the true (test) events rank among the top-1 events (Top-1).",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 45,
+                            "text": "Twitter dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Three datasets, Connect-4 and Pumsb-(star), stand-out, with Slim returning 10 times more patterns.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 25,
+                            "text": "Connect-4",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 30,
+                            "end": 42,
+                            "text": "Pumsb-(star)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The NUS-WIDE dataset  contains 269,648 images collected from Flickr.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "NUS-WIDE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Synthetic HR-VS is collected for training high-resolution stereo models, while the high-res real stereo (HR-RS) dataset is collected to benchmark high-resolution stereo matching methods under real-world driving scenes.",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 119,
+                            "text": "(HR-RS) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows a qualitative result of running different methods for two videos from the two tasks of 'change iPhone battery' and 'make smoke salmon sandwich' from the ProceL dataset, where all methods choose the same number of representatives (for clarity, we do not show representatives obtained from background).",
+                    "annotation_spans": [
+                        {
+                            "start": 159,
+                            "end": 173,
+                            "text": "ProceL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the CUL dataset does not contain both 0's and 1's, we sampled 0's equal to number of 1's in R from the unknown values.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 21,
+                            "text": "CUL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The individual kernel matrices Km represent similarities between all songs in the data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We considered the problem of recovering projective camera matrices from collections of fundamental matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The REDD data set contains several types of home electricity data for many different houses recorded during several weeks.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "REDD data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is assumed that the ith data set D i has",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conducted experiments on several TREC collections that were used in previous query-performance-prediction studies .",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 52,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Their introduced question prediction dataset is based on VisDial v0.9, along with a collected set of 100 question candidates.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 69,
+                            "text": "VisDial v0.9",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The theory is universally optimal and elegant, and we showed its practical applicability by constructing approximations to it to transfer information across disparate domains in standard UCI machine learning databases.",
+                    "annotation_spans": [
+                        {
+                            "start": 187,
+                            "end": 217,
+                            "text": "UCI machine learning databases",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on Multi-Type Data Sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 35,
+                            "text": "Multi-Type Data Sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Doc2vec model was trained on all of the documents in Web Track dataset and the number of vector dimensions were set to 100.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 70,
+                            "text": "Web Track dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although this makes the job of WSDDN much easier, it will miss a lot of objects in the dataset since the proposals have low recall.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, in the well known Netflix challenge dataset, 480,189 users submitted ratings on 17,770 movies, but on average a user rated only 209 movies.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 56,
+                            "text": "Netflix challenge dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first evaluation is based on the LETOR datasets , which include manual relevance assessments.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 51,
+                            "text": "LETOR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To compute these measures, we consider both the collection statistics and Wikipedia statistics, resulting in 12 sets of similarities (Dice, Tanimoto, PMI, Incidence Vectors, TFIDF Vectors, LSI Vectors)\u00d7(collection, Wikipedia).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", I n denote a collection of n images of a static scene captured respectively by projective cameras P 1 , ..., P n .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Second, comparing (2) with (1), our new dataset provides an improvement despite the difference in background, human appearance and pose distribution between our dataset and Human3.6M.",
+                    "annotation_spans": [
+                        {
+                            "start": 173,
+                            "end": 182,
+                            "text": "Human3.6M",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For large scale database, exhaustive search, i.e., comparing the query with each sample in the target database, is infeasible.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The goal of the algorithm is to select from a collection of documents a small set of highly representative terms that best summarize them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset will be released to benefit the community of learning-based filming.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimental results show that although there are many negative data in the Imagenet dataset, the PU classifier can successfully pick a large amount of positive data whose categories is the same as that of given data.",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 96,
+                            "text": "Imagenet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "imation based CF methods on the MovieLens (10M) and Netflix datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 68,
+                            "text": "MovieLens (10M) and Netflix datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection contains tweets about three different entities (Michelle Obama, Angela Merkel, and Angelina Jolie) and spans over nine months.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we conducted evaluation on RTF dataset , which consists of 22 real defocused images and ground truth defocus maps labeled with radii of disc PSFs.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 51,
+                            "text": "RTF dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The VOC datasets contain images from 20 classes, including people, animals (bird), vehicles (aeroplane), and indoor objects (chair), and are considered natural, difficult images for classification.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "VOC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Stated like this, it must be clear that this (the document collection part) is a rather drastic assumption.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this study, we construct the Cookpad Image Dataset, a novel collection of food images taken from Cookpad, the largest recipe search service in the world.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 53,
+                            "text": "Cookpad Image Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we propose to leverage the underlying relational information between items in a collection to make better compatibility predictions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Main clause prior probabilities were computed by using an SCFG with rule probabilities trained on the Penn Treebank version of the Brown corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 102,
+                            "end": 115,
+                            "text": "Penn Treebank",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 131,
+                            "end": 143,
+                            "text": "Brown corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "DCI CLOSED uses a limited amount of memory, very close to the size of the bitwise vertical representation of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To answer our research questions, we do the simulations using the real learning to rank data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, most existing datasets for instructional video analysis have the limitations in diversity and scale, which makes them far from many real-world applications where more diverse activities occur.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since we did not have the means to deploy a system that monitors click/skip activity and correspondingly alters search results with live users, we describe a collection of experiments on synthetically generated data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then we abandon meaningless expressions manually and obtain 47 common features of hotels from our dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper we address the above issues by proposing an entity search test collection based on DBpedia.",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 104,
+                            "text": "DBpedia",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This sub-dataset contains 2,786 audio-image pairs, where the sound-maker of each pair is individually located by three subjects.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In summary, our method improves the performance over STL in all of these datasets (no negative transfer), while baseline method performs comparatively on the school dataset and performs worse on the computer dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 158,
+                            "end": 172,
+                            "text": "school dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 199,
+                            "end": 215,
+                            "text": "computer dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 6 evaluates our method on a variety of synthetic and real-world datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These results are consistent with the previously reported evaluation on the Yeast dataset , showing some success of Rank-SVM by reinforcing partial-order preferences among categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 89,
+                            "text": "Yeast dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to assess the difficulty of COIN, we report the performance on different tasks compared with other datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our large scale experiment we used the full MNIST data set with 60000 training examples and 10000 test examples of 28 x 28 grey value images of handwritten digits.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 61,
+                            "text": "MNIST data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experimental results on TREC and DBLP data sets suggest that the proposed method is very effective in obtaining relevant results to the querying inputs.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 57,
+                            "text": "TREC and DBLP data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The improvements in the web collections are statistically signi cant.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our camera is fixed on the windscreen and doesn't fully satisfy the requirements given by the Ackermann motion model (i.e. position on top of the back wheel axis), but-as proven in -the restrictive model is still applicable if the rotation angle \u03b8 between two camera poses is sufficiently small, which is the case in our datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Indeed, for the kinship and the UML datasets, the performance of the MAP-based tensor factorization was as good as the performance of the BCTF model, which is due to the density of these datasets: the number of observations was much larger than the number of parameters.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 44,
+                            "text": "kinship and the UML datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the R 20 datasets, we needed about  : Comparisons of the detectability of the hidden clusters, based on normalized mutual information.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments on several benchmark datasets show that our approach can significantly outperform the traditional methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To fully evaluate our method on truncated objects, we create this dataset by randomly cropping images in the LINEMOD dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 124,
+                            "text": "LINEMOD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the Caltech-101 dataset has large number of classes and the experiments are computationally intensive (100 choose 2 classifiers need to be built in each case), the results are averaged over 3 sets of training and test datasets only.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 29,
+                            "text": "Caltech-101 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We formulate our top-K ERAC detection problem as: Given an entity universe set E, a feature set F , a target collection size N (N < |E|/2) and K, find the top-K most anomalous Extreme Rank Anomalous Collections of size at most N .",
+                    "annotation_spans": [
+                        {
+                            "start": 176,
+                            "end": 210,
+                            "text": "Extreme Rank Anomalous Collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At the end, each (u, i, t) record in the training dataset is associated with a sd t u,i value.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Kinetics-Skeleton dataset, the size of the input tensor of Kinetics is set the same as , which contains 150 frames with 2 bodies in each frame.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 33,
+                            "text": "Kinetics-Skeleton dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 67,
+                            "end": 82,
+                            "text": "Kinetics is set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further investigate this choice of NN structure in Appendix C.6 for the Protein dataset (results are fairly robust).",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 90,
+                            "text": "Protein dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this series of experiments, we generate user contexts and discover Context-aware Influential Objects (limited to K=5) from the CiteSeer dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 130,
+                            "end": 146,
+                            "text": "CiteSeer dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experiment with the INEX 2008 Wikipedia collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 53,
+                            "text": "INEX 2008 Wikipedia collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As part of this work, we introduce a new dataset for reflection removal that provides access to the two sub-aperture views.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Networks were trained until the training set accuracy failed to improve by at least .1% over 5 epochs, or overfitting became evident from periodic testing with the generalization test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is already preprocessed as described in",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The third dataset is food  which contains 5554 ratings by 212 users on 20 food menus.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "illustrates the experimental comparisons of point cloud segmentation on the ShapeNet Part dataset, where we also employ bold numbers to represent the improvement of SRN.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 97,
+                            "text": "ShapeNet Part dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For DECO, the dataset is partitioned into m = 3 subsets and it is implemented without the refinement step.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The observed term frequency was then discounted by the prior of the expectation of appearance in a random document in the English language using the Brown corpus .",
+                    "annotation_spans": [
+                        {
+                            "start": 149,
+                            "end": 161,
+                            "text": "Brown corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The vocabulary size of our experimental data set is 554061.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Surprisingly, on the SumMe dataset, our final method outperforms most of the supervised methods (except ) by a big margin (nearly 3%).",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 34,
+                            "text": "SumMe dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As performed in , we sample the audio waveforms at 16 kHz, split the training and validation sets into half second clips, and group each sequence into bins of 200 consecutive samples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i Inspired by the positive and unlabeled learning in single-label classification , we propose a method, called PU Stochastic Gradient Descent, which can handle large-scale datasets with missing label assignments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the experiments, we report retrieval and type prediction performance using pseudo-desktop collections and a computer science (CS) collection where queries are collected using a game interface.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The parameters setting are: \u03bb = 1, r = 100 for G-DNLR and \u03bb = \u03b1 = \u03b2 = 10 \u22122 , r = 100, k = 7 for G-ALDNLR on Extended Yale B dataset; \u03bb = 1, r = 142 for G-DNLR and \u03bb = \u03b1 = \u03b2 = 10 \u22122 , r = 110, k = 6 for G-ALDNLR on CMU-PIE dataset; \u03bb = 10 2 , r = 6 for G-DNLR and \u03bb = \u03b1 = \u03b2 = 10 \u22127 , r = 10, k = 8 for G-ALDNLR on Ballet dataset; \u03bb = 10 \u22122 , r = 27 for G-DNLR and \u03bb = 1, \u03b1 = 10 2 , \u03b2 = 10 \u22125 , r = 27, k = 8 for G-ALDNLR on SKIG dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 132,
+                            "text": "Extended Yale B dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 215,
+                            "end": 230,
+                            "text": "CMU-PIE dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 314,
+                            "end": 328,
+                            "text": "Ballet dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 424,
+                            "end": 436,
+                            "text": "SKIG dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the MS-distance is more general and flexible: it supports addition of a new vector to the data set (our data structure) in O(d) time for computing the mean and the standard deviation values of the vector.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They also proposed a word hashing technique for dealing with large vocabularies that are commonly associated with Web corpora.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Roughly 10% of the queries contain typographical errors and, although the queries are collected in the EN-US region, the dataset contains a subset of queries in ES-US Spanish.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used a multi-layer perceptron (MLP) consisting of 1000-1000 hidden layer with ReLU activation, LeNet , WideResNet , DenseNet-BC , and VGG  with batch-normalization for evaluations on datasets except for ILSVRC2015.",
+                    "annotation_spans": [
+                        {
+                            "start": 206,
+                            "end": 216,
+                            "text": "ILSVRC2015",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The right two plots in  report utility on a subset of the extended Yale Face Dataset B  for face clustering.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 84,
+                            "text": "extended Yale Face Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also present results for the 4096-dimensional Images data set  with a 75-25% reference-query split.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, unlike TREC where participants are given the datasets and execute their code locally, MIREX data sets cannot be distributed due to copyright restrictions.",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 110,
+                            "text": "MIREX data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we make a synthetic dataset containing 17K images by using the event camera simulator  for experiments1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the distribution of scores for a model trained on labeled WIDER-Face  and run on images from the validation split of the same dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 74,
+                            "text": "WIDER-Face",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ability to compress a dataset depends on the amount of recognisable structure.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the evaluation of ELSA with the New York Times corpus, it outperformed two simple baselines of Bag-Of-Words and LDA as well as two ESA-based methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 56,
+                            "text": "New York Times corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the proposed approach on PAS-CAL VOC 2007, 2010, 2012  and COCO , which are widely-used benchmark datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 65,
+                            "text": "PAS-CAL VOC 2007, 2010, 2012",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 71,
+                            "end": 75,
+                            "text": "COCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the performance of exact Gaussian processes against widely-used scalable GP approximation methods on a range of large-scale datasets from the UCI dataset repository .",
+                    "annotation_spans": [
+                        {
+                            "start": 153,
+                            "end": 175,
+                            "text": "UCI dataset repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This data set is available for download from http://www.schonlau.net/.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TREC2004 data set are shown in , 2 and 3, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 17,
+                            "text": "TREC2004 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimental results show that our method performs favorably against the state-of-the-art UDA approaches, i.e. we achieve the best-published result on the Office-31 benchmark and very competitive result on the challenging VisDA-2017 benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 159,
+                            "end": 178,
+                            "text": "Office-31 benchmark",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 226,
+                            "end": 246,
+                            "text": "VisDA-2017 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our (label) encoding scheme relies on the existence of mutually exclusive clusters of labels in real-life MLC datasets, where labels in different clusters (almost) never appear in the same label set, but labels from the same clusters can.",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 118,
+                            "text": "MLC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In section 4 we experimentally compare IDDTW to DTW and Euclidean distance on real and synthetic datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the three traditional graded-relevance IR test collections (NTCIR6C, TR09DIV+gr2T and TR09DIV2T), we evaluated nDCG, Q,",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 73,
+                            "text": "NTCIR6C",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 75,
+                            "end": 87,
+                            "text": "TR09DIV+gr2T",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 92,
+                            "end": 101,
+                            "text": "TR09DIV2T",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then apply our bundle adjustment method to 107 000 YouTube videos from the Kinetics dataset  and generate a large-scale dataset of 3D human poses aligned with the video frames.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 94,
+                            "text": "Kinetics dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Typically, users are motivated to tag as a way to organize their own personal music collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, in three out of the four cases, the traces on the subsampled datasets placed MD 4 of controlled and stressed plants too close to each other",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also use ImpClarity which is a variant of Clarity proposed for Web corpora : only the terms that appear in less than t% of the documents in the corpus are used to induce a relevance language model from the list; t is a free parameter.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ResNet model is utilized as backbone and trained on Visual Genome  dataset to predict attributes and classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 78,
+                            "text": "Visual Genome  dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Such selection bias is non-trivial to be removed from the collected dataset and may introduce erroneous causal relations in the results of causal discovery algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation collections for this task have been created by crowdsourcing relevance judgements .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, we detail how the first publicly available Twitter corpus",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 72,
+                            "text": "Twitter corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the WIDER dataset  we show that FA-RPN proposals are better than RPN proposals.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "WIDER dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Local PCA and lCA kernels were developed based on a database of 18680 small patches (12 pixel x 12 pixel) chosen from random locations in the Tulip1s database.",
+                    "annotation_spans": [
+                        {
+                            "start": 142,
+                            "end": 158,
+                            "text": "Tulip1s database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also show the training time of all methods in both dataset in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the same test for a sampled collection, we calculate the number of pairs in each of the following five categories: true positive: the run pair is significantly different in both collections and the collections agree as to which system is better; true negative: the run pair is not significantly different in either collection; miss: the run pair is significantly different in the base collection, but not the sampled collection; false alarm: the run pair is not significantly different in the base collection, but is in the sampled collection; inversion: the run pair is significantly different in both collections, but the collections disagree as to which is the better run.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "partition the questions into training and validation sets such that their respective answer distributions become intentionally dissimilar.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Data We benchmark various methods on 3 video datasets: the Open Video Project (OVP), the Youtube dataset , and the Kodak consumer video dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 83,
+                            "text": "Open Video Project (OVP)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 104,
+                            "text": "Youtube dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 115,
+                            "end": 143,
+                            "text": "Kodak consumer video dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the music dataset both our algorithms achieve around 20% improvement in accuracy over the factorial learning and single clustering algorithms (k-means and NNMA).",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 21,
+                            "text": "music dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conducted experiments with the datasets of the INEX entity ranking track of 2007 , 2008 , and 2009 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast to these, our work focuses on going deeper with the capsule networks and increase its performance on more complex datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated our augmented models on two corpora: the Hansards corpus  of English/French and the Europarl corpus  with EPPS annotation .",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 69,
+                            "text": "Hansards corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 97,
+                            "end": 112,
+                            "text": "Europarl corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our proposal has been validated by means of user studies and lab experiments using MovieLens dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 100,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For simplicity, we assume a linear history of versions; however, our techniques also apply to collections with branches (forks) in the revision history.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We introduce the MVTec Anomaly Detection (MVTec AD) dataset containing 5354 high-resolution color images of different object and texture categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 59,
+                            "text": "MVTec Anomaly Detection (MVTec AD) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 4 shows numerical results on UCI benchmark data sets, and on the above mentioned music application.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 60,
+                            "text": "UCI benchmark data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the siam-competition2007 dataset, we varied the percentage in the interval [0.05-3], and for the RCV-Topics-Subsets dataset, we varied in the interval .",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 36,
+                            "text": "siam-competition2007 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 101,
+                            "end": 127,
+                            "text": "RCV-Topics-Subsets dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we have studied how to build succinct positional full-text index structures for versioned document collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Due to the large quantity of vehicle IDs and images, the proposed VERI-Wild dataset poses significant challenges to vehicle ReID.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 83,
+                            "text": "VERI-Wild dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show the average per-pixel L 1 loss for each unseen test dataset for each material and the per-pixel linear RGB angular error \u03b8 rgb for the diffuse ball, a distance metric commonly used to evaluate white-balance algorithms (see Hordley and Finlayson ), in  (top).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use a snapshot of the DBLP data downloaded on May 17, 2010 to create a benchmark data set for our experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 34,
+                            "text": "DBLP data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We illustrate our method on data from a large extract of the RCV1 corpus , processed and made freely available for multiview multilingual learning experiments .",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 72,
+                            "text": "RCV1 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to validate the diversity of our relational captioner, we need to make our relational captioning dataset to have more natural sentences with rich expressions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These methods work very well for MAR data settings, because they assume that the missing features are generated by the same model that generates the observed features.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 41,
+                            "text": "MAR data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is worth noting that HITS exhibits quite poor performance (even exhibits negative correlation to AA on Amazon datasets), as it does not consider the users' reputation in ranking.",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 121,
+                            "text": "Amazon datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the following discussions, we denote m, n, p, q as the number of users, items, features and opinionated phrases in a dataset, and a, b, c, d as the corresponding dimensions of latent factors for them in the learnt model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In general, all the methods considered in  have performed worse on the TU-Berlin dataset, which might be due to the large number of classes, where many of them are visually similar and overlapping.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 88,
+                            "text": "TU-Berlin dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For all datasets, learning with the extracted features yields better generalized errors than learning with the original features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, this distribution may be just uniform; this is the case in one of the datasets News20 used in our experiment explained later.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 99,
+                            "text": "News20",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the remainder of the paper we first discuss related work (Section 2), outline the dataset used (Section 3), and present the proposed music recommendation models (Section 4).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ground truth of the dataset is annotated with a tooth-level bounding box, mask, and label.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All experiments are conducted for 5 times, the statistics of last 10/5 epochs' test accuracy are reported as the performance of both proposed and baseline methods in CIFAR/ImageNet datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 166,
+                            "end": 189,
+                            "text": "CIFAR/ImageNet datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the NPB and MLB data sets, decreasing the kernel width worsened classification accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 29,
+                            "text": "NPB and MLB data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, the collection needs to contain a large number of opinionated documents towards different entities.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct extensive experiments on three public shadow detection datasets, SBU, UCF and ISTD, to evaluate our method.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 79,
+                            "text": "SBU",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 81,
+                            "end": 84,
+                            "text": "UCF",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 93,
+                            "text": "ISTD",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also note that the improvement made by SI-VGRNN compared to VGRNN is marginal in Facebook dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 100,
+                            "text": "Facebook dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A NER system based on CRF model which incorporates long-distance information .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The extracted information also helped by providing the connection between users' vocabulary and the vocabulary used in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the design of the object sentiment classifier, besides the labeled emotional and neutral objects from EMOd , we further generated objects with sentiment labels from COCO attributes  datasets based on their objectlevel attributes.",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 109,
+                            "text": "EMOd",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 168,
+                            "end": 172,
+                            "text": "COCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Despite their success, annotating labels at the pixel level is prohibitively expensive and time-consuming, e.g., about 90 minutes for a single image in the Cityscapes dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 156,
+                            "end": 174,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For validation, we have used four data sets -two small and two large data sets from real examples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the accuracy values for TREC-8, TREC-10, and TREC-12 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 67,
+                            "text": "TREC-8, TREC-10, and TREC-12 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To utilize the annotated data from other datasets, previous human parsing methods must be pre-trained on the other dataset and fine-tuned on the evaluation dataset, as the #4 result in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the sido data set, Adagrad (with fixed steps size, left column) achieves a good testing loss quickly then levels off, for reasons we cannot explain.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "sido data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The last dataset was synthetised using the IBM generator.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Indeed, the more a word deviates in a document from its average behavior in the collection, the more likely it is 'significant' for this particular document.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As future work, we would like to build even deeper and higher level understanding models and apply on Ima-geNet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 102,
+                            "end": 119,
+                            "text": "Ima-geNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate that a multi-scale hierarchical organization of VQ-VAE, augmented with powerful priors over the latent codes, is able to generate samples with quality that rivals that of state of the art Generative Adversarial Networks on multifaceted datasets such as ImageNet, while not suffering from GAN's known shortcomings such as mode collapse and lack of diversity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We note that the synthetic data set was intentionally designed in such a way that the classification accuracy was very high for low uncertainty levels, and then gradually reduced with addition of noise.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Interestingly, the Stopword approach has performance similar to N-gram in both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The three stages described above result in a collection of clusters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As thus arranged, an iteration of 3-10 E-steps combined with one M-step only requires a single pass over the user corpus.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For lack of space we only report the results for #P centrality on the Quote dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 83,
+                            "text": "Quote dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and the generator network is pre-trained with our proposed semi-supervised GANs on each dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, if the target dataset is small and the number of parameters is huge, fine-tuning the whole network may result in overfitting .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the reshaping process for test datasets has a critical difference from reshaping training datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MovieLens-IMDB (ML-IMDB) is a dataset extracted from the IMDB and the MovieLens-1M datasets 2 by mapping the MovieLens and IMDB movie IDs and collecting the movies that have plots and keywords.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 14,
+                            "text": "MovieLens-IMDB",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 57,
+                            "end": 91,
+                            "text": "IMDB and the MovieLens-1M datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It first computes the probability of how likely a group is talking about a query topic; it collects evidence of how knowledgeable the group is for a given query via all documents in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate SiamRPN++ on its test set with 511 videos.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Data sets OHSUMED contains features extracted from query-document pairs in the OHSUMED collection, a subset of MEDLINE, a database of medical publications.",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 97,
+                            "text": "OHSUMED collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 111,
+                            "end": 118,
+                            "text": "MEDLINE",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 10,
+                            "end": 17,
+                            "text": "OHSUMED",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the NCEP/NCAR Reanalysis 1 dataset, where we considered the monthly means for 1948-present .",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 42,
+                            "text": "NCEP/NCAR Reanalysis 1 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After constructing a term vector representation for each piece in the music collection, we need a method to find those tracks that are most similar to a natural language query.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 86,
+                            "text": "music collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The challenge to build such a large-scale dataset mainly stems from the difficulty to organize enormous amount of video and the heavy workload of annotation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the Walmart.com dataset we filter out users with less than ten activities and products that interacted with less than five users.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 26,
+                            "text": "Walmart.com dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The test data set is discretized using the same threshold learned from the training data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This new Web collection finally shows the long expected value of Web link structure for ad hoc search.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply our method in two challenging real-world video datasets and show that it achieves state-of-the-art for both weakly supervised action segmentation and alignment.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply causal discovery algorithms to both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "found OERs, like YouTube video, dataset, and Wikipedia page, can assist students better understand the scientific readings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is typically resulted in di erent reasonable spelling variants being fed into the nal corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 92,
+                            "text": "nal corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The baseline of the proposed method achieves mIoU 71.3% on Cityscapes test set with 100 FPS inference speed.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 78,
+                            "text": "Cityscapes test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Mutex features We used the Traffic and Temperature data sets  to evaluate the benefit of detecting mutual exclusivity.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 60,
+                            "text": "Traffic and Temperature data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider 2 OCR image data sets -Optdigits (64 dimensional)  and MNist (784 dimensional) .",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 44,
+                            "text": "Optdigits",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 67,
+                            "end": 72,
+                            "text": "MNist",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, similar to , we extract the mutual information from a corpus containing the top m retrieved documents and r \u00d7 m documents randomly selected from the collection, where r is a free parameter that controls the generality of mutual information scores.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report the frame-level mean average precision (frame-mAP) with an IoU threshold of 0.5 for both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The single-syllable network described above was tested on the collection of single-syllable words identified as irregular by .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the result, we can see this data set has highly distinguishing feature around the very beginning of the sequence.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We work just with the text content of the collection (that is, we ignore the html code from the documents).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "-The proposed model can run at a real-time speed of 26 FPS and achieves state-of-the-art performance on five large-scale salient object detection datasets including ECSSD , PASCAL-S , DUT-OMRON , HKU-IS , and test set from DUTS .",
+                    "annotation_spans": [
+                        {
+                            "start": 165,
+                            "end": 170,
+                            "text": "ECSSD",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 173,
+                            "end": 181,
+                            "text": "PASCAL-S",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 184,
+                            "end": 193,
+                            "text": "DUT-OMRON",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 196,
+                            "end": 202,
+                            "text": "HKU-IS",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 223,
+                            "end": 227,
+                            "text": "DUTS",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use five subsets of the WALS database, obtained by sorting both the languages and features of the database according to sparsity and using a varying percentage (10% \u2212 50%) of the densest portion.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 40,
+                            "text": "WALS database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "kd-trees and metric-trees on an uniformly distributed 10-dimensional dataset containing 20,029,440 points ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A distributed event is a collection of smaller, single events occurring at the same day and conforming to one overarching theme.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, most training datasets involve tens of users with a few hundreds (or more) repeated acquisitions per user.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For both Wall Street J ournal data with added noise samples and the Aurora benchmark, we show that the new noise adaptive technique performs as well as or significantly better than the non-adaptive algorithm, without the need for a separate training set of noise examples.",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 84,
+                            "text": "Aurora benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We found, from a bilingual annotator who used VaporEngine to browse the Avaj Otalo collection, that an unexpectedly large number of such terms arose from speaking conventions used in announcements; which consistently included long salutations, and which were recorded by a relatively small number of speakers (e.g., agricultural outreach agents).",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 93,
+                            "text": "Avaj Otalo collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both studies, however, were on small or very small test collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": We assess the effectiveness of our method for cross-dataset evaluations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We describe the results of our method on LETOR 2.0 , a publicly available benchmark data collection for comparing learning to rank algorithms.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 50,
+                            "text": "LETOR 2.0",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Partition exploits a proper partitioning of the dataset, since it splits the dataset into disjoint subsets of transactions which cover the whole dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the public available Epinions dataset provided by Richardson et al.  and treated each month as a time window.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 45,
+                            "text": "Epinions dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Convolutional neural networks (CNNs) have rapidly established themselves as the de facto standard for tasks of semantic and instance segmentation, as demonstrated by their impressive performance across a variety of datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare our model with both classical MMs and VLMMs on three data sets representing a wide range of grammatical and statistical structure.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then we conduct experiments on real datasets to evaluate the accuracy and efficiency of the proposed model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "lists the percentage of relevant and candidate documents for five sample SRs taken from the CLEF 17 eHealth Task 2 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 92,
+                            "end": 122,
+                            "text": "CLEF 17 eHealth Task 2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Fashion-Gen  is a dataset of fashion products collected from an online platform that sells luxury goods from independent designers.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 11,
+                            "text": "Fashion-Gen",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the performance of this deep model with two object discovery and co-segmentation methods  in the Internet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 124,
+                            "text": "Internet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As very large data matrices have become a commodity in scientific and economic applications alike, there is an increasing demand for low-rank approximations techniques that cope with massive data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Participants were told their job was to examine news during Clinton's second term, 1996-2000, which corresponded to the date range of the test collection .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We create a training dataset about 60K event stacks with corresponding APS image pairs based on their precise timestamps, and test our method on both scenes with normal illumination and also HDR scenes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We develop an efficient variational algorithm for posterior inference, and our extensive empirical studies on large-scale MovieLens and EachMovie data sets appear to justify the aforementioned dual advantages.",
+                    "annotation_spans": [
+                        {
+                            "start": 122,
+                            "end": 155,
+                            "text": "MovieLens and EachMovie data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is because each partition does not enclose knowledge about the global collection of closed itemsets, and therefore it is not possible to locally understand whether an itemset is globally closed or not.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the result we can see the significant improvements brought from our approach on almost all the measures and datasets, which shows the effectiveness and robustness of our approach.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Ablation analysis with different core-components of our face detector on the hard-set of the WIDER dataset (hard-set contains all images in the dataset).",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 108,
+                            "text": "WIDER dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent developments have shown ways to use multiple data sets, provided they originate from identical experiments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "GRLSI, RLSI, GNMF, and NMF were trained on Web-I dataset with the same parameter settings in Section 7.1.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 56,
+                            "text": "Web-I dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used three test collections in this experiment: TREC disks 4 & 5, WT2G, and WT10G.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 67,
+                            "text": "TREC disks 4 & 5",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 69,
+                            "end": 73,
+                            "text": "WT2G",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 79,
+                            "end": 84,
+                            "text": "WT10G",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our dataset is divided into two categories -controlled",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar to our dataset, ShanghaiTech Part B is also a dataset with surveillance view.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 43,
+                            "text": "ShanghaiTech Part B",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In supervised machine learning one tries to learn a strong prediction model based on a labeled data set D with n instances and m predictors X \u2208 R n\u00d7m with corresponding labels y \u2208 R n that minimizes a given loss function L for future data instances.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to learn the importance weights of different kernels for the retrieval tasks, we assume a small training data set is available for our learning task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TS3D  is based on SSC-Net, and utilizes an additional network to incorporate the color information into the learning loop.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 4,
+                            "text": "TS3D",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 18,
+                            "end": 42,
+                            "text": "SSC-Net, and utilizes an",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consequently, our simulator can cancel out the influence of unknown confounders, such as the age and the occupation of a patient, and other practical issues in the real dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the random walk dataset, we randomly generated sequences, also of length 512.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 27,
+                            "text": "random walk dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Considerable efforts have been devoted to collection selection algorithms, e.g., CORI , ReDDE , and hierarchical database selection algorithm .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i, j, compute their 2 \u00d7 2 joint distribution in the image corpus, then compute the mutual information of that distribution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the mnist-back-image data set, the best MKM used an n = 0 arc-cosine kernel and 300-50-130-240-160-150 features at each layer.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 33,
+                            "text": "mnist-back-image data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Aamzon and Bookcrossing datasets, we find that L1/L2-MAX, and L1/L2-AVG are significantly more robust than the other algorithms.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 35,
+                            "text": "Aamzon and Bookcrossing datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar to dynamic pruning strategies such as MaxScore  over posting lists from the Information Retrieval area, and the Threshold algorithm  from database area, our algorithm is composed of filtering (Line 2 to 7) and refinement (Line 18 to 23) based on LEVI, as shown in Algorithm 1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the generated dataset, we implement the nonlocal neural networks  which is commonly used for action recognition task and pretrain it on both the SURREAL and SURREAL++ dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 157,
+                            "end": 186,
+                            "text": "SURREAL and SURREAL++ dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For all three, we use the features provided with the datasets, which include color histograms, SIFT, PHOG, and others (see  for details).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Robustness: To evaluate the robustness of an algorithm, we add to the original datasets three types of spamming users, who give ratings to a random set of objects.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The main goal of the SemSearch Challenge is to create evaluation collections for the task of Ad-hoc Object Retrieval  on the Web of data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method still outperforms the GazeNet+, but the improvement is not as large as in the within-dataset evaluation setting.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand,  requires a polynomial number of data points in the ambient space dimension, which often cannot be met in high-dimensional datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The baseline shows superior performance because there are many more relevant documents in the TD datasets, and many of them are not presented to the user by the production ranker.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 105,
+                            "text": "TD datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments are run on two datasets with associated workloads: the GOV2 dataset is 426GB of data in 25.2 million documents using the TREC '04-'06 query topic workload (701-850) and the ClueWeb09b dataset is 1.39 TB of data in 50.2 million documents using the TREC '10-'12 query topic workload (51-200).",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 83,
+                            "text": "GOV2 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 189,
+                            "end": 207,
+                            "text": "ClueWeb09b dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, we consider collections of multiview videos registered in time and space, where 3D pose estimates can be obtained via triangulation of 2D poses.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given an event data set D and a target label X, FBS first initializes the parent set U to be empty.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset results in 8 iterations of the outer-loop in the apriori association mining algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The aim is to capture global repetition across a collection that adaptive compression algorithms do not detect.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The training set contains 50,000 images and the test set contains 10,000 images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform the task of kiss detection and localization on this dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we have illustrated the effectiveness of the approach on the 20-and 25-dimensional synthetic data sets with increasing uncertainty level.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "wikiSearch, is a search engine customized for the Wikipedia corpus but with design features that may be generalized to other search systems.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 66,
+                            "text": "Wikipedia corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In general, we follow the same splits as used in  that include 140,110 training and 8,670 test pairs, where clothing and models do not overlap between train and test sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We hope to expand these experiments to larger datasets and deeper models in future work.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The NIPS dataset 3 consists of the full text of the 12 years of proceedings from 1987 to 1999 Neural Information Processing Systems (NIPS) Conferences.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "NIPS dataset 3",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Fixing |V | = 1, 000, 000 and k = 10, we generate a synthetic dataset by increasing the graph density from 2 to 25.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive evaluation on the Princeton RGB-D tracking and STC Benchmarks shows OTR outperforms the state-of-the-art by a large margin.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 71,
+                            "text": "Princeton RGB-D tracking and STC Benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "EM-DD is relatively insensitive to the number of relevant attributes in the data set and scales up well to large bag sizes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the size of the dataset), and D is equidistributed across all MPI processes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reason is that the values of |D| ut(D) (see Equation ) are very close to each other for different documents, especially in newswire collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our analyses reveal that: 1) some visual salient regions (e.g. head, text, symbol, vehicle)  are already encoded within various layers of the network pre-trained for object recognition, 2) using modern datasets, we find that fine-tuning pre-trained models for saliency prediction makes them favor some categories (e.g. head) over some others (e.g. text), 3) although deep models of saliency outperform classical models on natural images, the converse is true for synthetic stimuli (e.g. pop-out search arrays), an evidence of significant difference between human and data-driven saliency models, and 4)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We repeat the sample collection 25 times for each dataset to obtain variance estimates on posterior quantities of interest.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reason for the slightly better RMSE score achieved by the two systems on the Netflix dataset are twofold.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 96,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results show that sequential attention can be well adapted for the AS task and outperform other attention mechanisms on different QA datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The PCD dataset  is a dataset for detecting scene changes from a pair of street view panorama images.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "PCD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically a Sampling module produces a collection of high quality proposals, facilitated by a multiple choice adversarial learning scheme, yielding diverse frame proposal set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The importance of this model cannot be understated -it has been chosen to create graphs for the Graph500 supercomputer benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 128,
+                            "text": "Graph500 supercomputer benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To capture the dynamic nature of sentiments expressed on social media, a collection that spans over a long period of time is needed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unlike other RGB-D datasets  that capture RGB-D videos around the scene, the AVD uses a robotic platform to visit a set of discrete points on a rectangular grid with a fixed width of 300mm.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 27,
+                            "text": "RGB-D datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The simulator generates less unlikely diagnostic records than those in the real datasets, which may be due to the missing and noisy labels in the real-world data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our algorithm runs efficiently on the CRU dataset and converges within one and a half hours in a Linux workstation.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 49,
+                            "text": "CRU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this study, we use the dataset released by this task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since Pascal Faces and AFW contain low-resolution images and also do not contain faces as small as the WIDER dataset, we do not perform inference on the 1800 \u00d7 2800 resolution.",
+                    "annotation_spans": [
+                        {
+                            "start": 6,
+                            "end": 18,
+                            "text": "Pascal Faces",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 23,
+                            "end": 26,
+                            "text": "AFW",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 103,
+                            "end": 116,
+                            "text": "WIDER dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, the values of entry-wise PDFs can be used to guide adaptive exploration of the hidden datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use real data from gene expression data sets and well-known UCI Repository .",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 77,
+                            "text": "UCI Repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The complete data set can be downloaded and the vast majority of pages relating to ambiguity can be identified relatively easily, by either matching the string \"_disambiguation\" in the title of the article or, more commonly, finding the \"{{disambig}}\" template tag.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use images with groundtruth labels that record geometric attributes, specifically the multi-view face images from the Multi-Pie dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 138,
+                            "text": "Multi-Pie dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Suppose we are given an unlabeled dataset of N feature vectors (datapoints)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although Covtype dataset also has low outlier rate, the instance ranking by Isolation Forest has a quite low AU-PRC score, and outliers are distributed in a long tail in the ranking.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 24,
+                            "text": "Covtype dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next we report on experiments using a computer science (CS) collection, where the documents of various types are collected from many public sources in the Computer Science department the authors belong to.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that, the human performance on this dataset is 0.813.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Submitted systems may use any public or private data when developing their systems, with a few exceptions: Systems may only use the SuperGLUE-distributed versions of the task datasets, as these use different train/validation/test splits from other public versions in some cases.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "How to speed up these algorithms for handling large-scale dataset (since they usually require the computation of matrix inverse).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Except for Q1 on Yelp dataset where EFM is slightly better than MTER, our proposed solution apparently outperforms both baselines in all aspects of user study, which is also supported by the paired t-test.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 29,
+                            "text": "Yelp dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the vehicle prediction domain, our model achieved state-of-the-art results at long-range prediction of vehicle trajectories in the NGSIM dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 134,
+                            "end": 147,
+                            "text": "NGSIM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we study the robustness to sparse perturbations of an adversarially trained ResNet-18 network on the CIFAR-10 dataset, using`\u221e perturbations.",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 126,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They also show that the best run employs sophisticated methods of retrieval using two complementary indices, one constructed by extracting terms from the patent collection and the other built from external resources such as Wikipedia.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(1) For each dataset, SR # significantly achieves \u223c0.95 coverage of common multi-hop in-neighbors, much superior to JSR/LSR (\u223c0.20), SR",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "focus on Synthetic-to-Real visual image translation and a benchmark called VisDA  has been proposed recently.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 80,
+                            "text": "VisDA",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested our model on three data sets with different memory and stochastic components.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The NOAA dataset contains eight features: temperature, dew point, sea-level pressure, visibility, average wind speed, max sustained wind speed, minimum temperature, and maximum temperature.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "NOAA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experiment with two datasets: Reuters RCV1 from TREC 2002 (2.5GB, 50 title-only queries) and INEX 2005",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 45,
+                            "text": "Reuters RCV1",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 95,
+                            "end": 105,
+                            "text": " INEX 2005",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments, we show that the proposed approach outperforms other approaches that are evaluated on the dataset like classification or ranking approaches.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It has been shown that networks of spiking neurons that use this synaptic plasticity model can learn to classify complex patterns of spike trains ranging from stimuli generated by auditory/vision sensors to images of handwritten digits from the MNIST database .",
+                    "annotation_spans": [
+                        {
+                            "start": 245,
+                            "end": 259,
+                            "text": "MNIST database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Synthetic spatio-temporal datasets were generated using a method similar to the spatial data generator used in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the experimental results on Clickture dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 51,
+                            "text": "Clickture dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The PSNR vs. runtime of state-of-the-art deep learning motion deblurring methods and our method on the GoPro dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 116,
+                            "text": "GoPro dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The image benchmark USPS 2 is used for this experiment, which contains 9298 images of handwritten digits from 0 to 9 of size 16 \u00d7 16, with 1553, 1269, 929, 824, 852, 716, 834, 792, 708, and 821 instances of each digit respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 26,
+                            "text": "image benchmark USPS 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 4 reports extensive experimental results on real-world datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "BaiduQA dataset is a collection of 648, 514 questions crawled from a popular Chinese Q&A website  .",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 15,
+                            "text": "BaiduQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the last layer usually learns the general distribution of the dataset, IRG of the last layer is less correlated with the network design.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conclude by discussing how the approach can be extended to allow parts-based object recognition, wherein an object is modeled as a collection of local features (or \"parts\") and their relative transformations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, for the Gov2 collection, the H-WSD method improves the performance (in terms of MAP) for 60% of the queries compared to the WSD baseline, while hurting only 30% of the queries.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 37,
+                            "text": "Gov2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "where N is the number of documents in the collection, n is the number of documents containing qi",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We calculate the cross-entropy loss (L ce ) with respect to the samples of the reference dataset and both the membership loss (L m ) and the cross-entropy loss with respect to the samples of known classes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We revisited the dataset from Smith et al. , consisting of 109 genetically diverse yeast strains, each of which has been expression profiled in two environmental conditions (glucose and ethanol).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The aset400 data set  contains 16,385 relative comparisons over 412 artists.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "aset400 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On Reuters' data set, because there are too many relevant documents for each topic in the corpus, we used a random sample of 10% of each topic for training, and 10% of the remaining documents for testing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "dataset consists of 2,501 train images, 2,510 validation images, and 4,952 test images, while PASCAL VOC 2012 dataset contains 5,717 train images and 5,823 validation images.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 117,
+                            "text": "PASCAL VOC 2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the smaller Wikipedia corpus, the online algorithm finds a better solution than the batch algorithm does.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 31,
+                            "text": "Wikipedia corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This collection is associated with the maximal IFS of {\"Number of words\", \"Top 100 corpus precision\", \"Independent LH\" }, where corpus precision refers to the fraction of words that appear in the set of popular terms, and Independent LH is a measure of the independence of the distribution of trigrams.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "et al measured effectiveness using an in-house test collection; Chen and Karger found a number of creative ways to test their system using TREC data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A video of the performance of our method on the KITTI dataset is available at https: //youtu.be/Bskd0Z7eLFE.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 61,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When the parents U of all nodes X are known, the log likelihood of an event dataset given a PGEM can be written in terms of the summary statistics of counts and durations in the data and the conditional intensity rates of the PGEM: where N (x; u) is the number of times that X is observed in the dataset and that the condition u (from 2 |U| possible parental combinations) is true in the relevant preceding windows, and D(u) is the duration over the entire time period where the condition u is true.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "app datasets with n l elements, L = {(x 1 , z 1 , y 1 ),",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use three consecutive windows wi, wi+1, wi+2 to form, respectively, the training, historical and test datasets for one experimental run.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compute adapted parameters using gradient descent : The Mini-Imagenet dataset was proposed by  and it consists of 100 classes from Imagenet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 131,
+                            "end": 147,
+                            "text": "Imagenet dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 56,
+                            "end": 77,
+                            "text": "Mini-Imagenet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In what follows, we assume that we are given a labeled dataset where (x i , y i ) stands for the ith instance (feature vector) and its class label is y i , as well as a set of tree-structured taxonomies {T t } T t=1 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(c), we compared the performances with datasets of different number of event types.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results reported for the GOV2 collection in  give an index of size 30,310 MB that includes docIDs and frequencies, but not the text needed for snippet generation, making this approach uncompetitive for our scenario.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 44,
+                            "text": "GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the net- work of  was trained exclusively on NYUv2, while our network was trained on a set of datasets excluding NYUv2 with different cameras and data distributions (some of the datasets are outdoors, see  and ).",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 60,
+                            "text": "NYUv2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 123,
+                            "end": 128,
+                            "text": "NYUv2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a result, a troubling practice in visual domain adaptation research is to equate datasets with domains and study the problem of cross-dataset generalization or correcting dataset bias .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and we show their retrieval effectiveness on CLEF-IP 2010 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 65,
+                            "text": "CLEF-IP 2010 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hence, to obtain favorable performance on a target dataset whose feature space or distribution is different from the source data, one may need to recollect labeled training data manually and then retrain the models on it.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With the availability of large scale datasets like WIDER  which contain many small faces in high resolution images, multiple new techniques for face detection have been proposed .",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 56,
+                            "text": "WIDER",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The log of the score is linear in x: and q j = log\u03b1 j \u2212 log \u03b1 j \u2212 log\u03b2 j + log \u03b2 j (14) If we put the entire data set D into one large matrix X with J columns, we can compute the vector s of log scores for all points using a single matrix vector multiplication For sparse data",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our dataset contains the same repeated transactions, so calculating the attention weight using Equation (4.2) and",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "collection contains corporate emails from a defunct information technology company referred to as \"Avocado\".",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ImageNet dataset is a large-scale image classification dataset, which contains about 1.28 million color images for training and 50,000 for validation.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 16,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, we investigate the time taken by our divide and conquer algorithm on large real and synthetic datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TREC Legal track datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 25,
+                            "text": "TREC Legal track datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have studied reusability of the TREC 2014 Contextual Suggestion open test collection in terms of the reusability of the judgments to evaluate non-pooled runs and in terms of fraction of judged venues.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 87,
+                            "text": "TREC 2014 Contextual Suggestion open test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar to the original YOLOv3 paper , we conducted several experiments on MSCOCO2017 test-dev dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 102,
+                            "text": "MSCOCO2017 test-dev dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We trained adversarial nets an a range of datasets including MNIST , the Toronto Face Database (TFD) , and CIFAR-10 .",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 66,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 100,
+                            "text": "Toronto Face Database (TFD)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 107,
+                            "end": 115,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We generated a spatial dataset per each time slot.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After the award contest phase of the Challenge is completed, this dataset will continue to be used by the evaluator (Sec.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our approach for various targets including 3D shape datasets as well as real images and partial points scans.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Further, we present extensive reconstruction results in videos from two popular video segmentation benchmarks, VSB100  and Moseg , that contain videos from Hollywood movies and Youtube.",
+                    "annotation_spans": [
+                        {
+                            "start": 111,
+                            "end": 117,
+                            "text": "VSB100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 123,
+                            "end": 128,
+                            "text": "Moseg",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A possible way is to combine partitions of datasets which have been projected over contiguous items in the total order",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We filtered out documents not belonging to MIME-types text/plain and text/html to obtain a dataset that totals 0.4 TBytes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, the dataset is divided into the intermediate and the advanced sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While UCI datasets are useful for benchmark purposes, rcv1 and Big_Ads are more interesting since they embody real-world datasets with large numbers of features, many of which are less informative for making predictions than others.",
+                    "annotation_spans": [
+                        {
+                            "start": 6,
+                            "end": 18,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 54,
+                            "end": 58,
+                            "text": "rcv1",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 63,
+                            "end": 70,
+                            "text": "Big_Ads",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Movie-Lens (10M) and Netflix datasets are used in this study.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 41,
+                            "text": "Movie-Lens (10M) and Netflix datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is also a new movie rating dataset, however, it is collected from social media, like twi er.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The validation accuracy on MSVD-QA dataset increases from 0.298 to 0.306 when the number of reasoning iteration L increases from 1 to 3, and seems to saturate at L = 5 (0.307), while drops to 0.304 at L = 7.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 42,
+                            "text": "MSVD-QA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While supervised learning with direct 3D supervision has shown promising results, its modeling capabilities are constrained by the quantity and variations of available 3D datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MemeTracker dataset records the times when different blog and news websites post \"memes\".",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "MemeTracker dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Every year more than 25 test collections are built among the main Information Retrieval (IR) evaluation campaigns.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is difficult to stabilize the adversarial training especially on a large-scale dataset like ImageNet.",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 103,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To demonstrate the effectiveness of our method in a semi-supervised learning (SSL) context, we performed experiments on some benchmarks datasets for SSL described in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate the effectiveness of our thLDA model on both a microblogging dataset and standard text collections including AP and RCV1 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 144,
+                            "text": "AP and RCV1 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, if the time period of the Twitter background dataset does not match that of the testing datasets form which we extract the topics, then the trained WE models may not adequately capture the semantic similarity of words, as is likely the case for GloV e.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 62,
+                            "text": "Twitter background dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this task, we first train our proposed DLOW model using the GTA5 dataset as the source domain, and Cityscapes as the target domain.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 75,
+                            "text": "GTA5 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 102,
+                            "end": 112,
+                            "text": "Cityscapes",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover,  shows that the appearance frequencies of diagnostic labels in the real dataset decay exponentially, which means that many diagnostic labels only appear in few patient diagnostic records.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate the performance of our proposed model on various high-dimensional real-world microarray gene expression benchmark datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, this dataset does not contain information (e.g., pmids) of all candidate documents of its SRs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, we additionally demonstrate the efficacy of coordinate-based texture inpainting idea on the face texture inpainting task for in-the-wild new view synthesis of faces, using the 300-VW dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 189,
+                            "end": 203,
+                            "text": "300-VW dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For well-separated synthetic datasets, we let increase from 10 2 to 10 6 and plot the results in (a).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the normalization makes the values of each feature and the target value in the data set have zero mean and unit variance, the initial values of the iteration only depend on the mean and variance of the selected distribution, denoted as \u00b5 and s 2 , respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The BOLT IR test collections are straightforward to use to measure the effectiveness of post and thread retrieval.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 28,
+                            "text": "BOLT IR test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The key idea of generating hard negative samples has significantly improved the state-of-the-art performance on vehicle ReID benchmarks.",
+                    "annotation_spans": [
+                        {
+                            "start": 120,
+                            "end": 135,
+                            "text": "ReID benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The synthesized mask and the image imply that our model focuses on filling in occluded or disoccluded regions, separating the foreground region      : Video prediction results on UvA-NEMO and MGIF datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 179,
+                            "end": 205,
+                            "text": "UvA-NEMO and MGIF datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although CLM also performs well on these datasets, it cannot beat TMSA as it does not encode the topical information explicitly into the word representation learning process.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "illustrates the number of blogs involved in the top three topics in the collection based on their propagation level.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to the Mall dataset , it poses more challenging situations with severe perspective distortion and diverse scenes.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 28,
+                            "text": "Mall dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here we apply our SAVIGP inference method to the Gaussian process regression networks (GPRNs) model of , using the SARCOS dataset as a test bed.",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 129,
+                            "text": "SARCOS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this dataset, each sample represents a retweet labeled by t r \u2212 t 0 where t r and t 0 are when the retweet and the original tweet was posted, respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each topic of 25 documents from the AQUAINT corpus of English news is to be summarized into a collective abstract of at most 250 words.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 50,
+                            "text": "AQUAINT corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Much of this work has been applied to prediction tasks, where given a dataset D = {",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At last, we conduct comprehensive experiments using a combined dataset containing five caption styles: humorous, romantic, positive, negative and factual styles.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows a spatiotemporal dataset consisting of instances of several spatial events over different time, each event type represented by a distinct shape.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When a method is evaluated, we use the historical and training datasets to produce the recommendations and the test dataset as ground truth to evaluate the performance metrics of the recommendations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "GNNs have recently been applied to many datasets, including sports trajectory data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the proposed approach with two state-of-the-art semi-supervised learning methods (traditional co-training  and mean-teacher ) on the BP4D database.",
+                    "annotation_spans": [
+                        {
+                            "start": 144,
+                            "end": 157,
+                            "text": "BP4D database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The precision and recall plots of the Biochemistry data set are illustrated in  respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 59,
+                            "text": "Biochemistry data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, k could be selected according to the amount of data in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After removing the punctuations, we collect 12, 593 word tokens for MSVD dataset and 27, 891 word tokens for MSR-VTT dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 80,
+                            "text": "MSVD dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 109,
+                            "end": 124,
+                            "text": "MSR-VTT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On a 3.4 GHz CPU, the fully collapsed Gibbs sampler of the BNBP topic model takes about 2.5 seconds per iteration on the NIPS12 corpus when the inferred number of topics is around 180.",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 134,
+                            "text": "NIPS12 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show twodimensional embeddings of training data points and classification boundaries for each label obtained by MLDA and BSML on Emotions",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The methodology uses the Reuters-21578 document collection, but could be applied for any pre-classified collection of documents.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 58,
+                            "text": "Reuters-21578 document collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A Gaussian processes is an infinite collection of random variables for which any finite subset are jointly Gaussian distributed .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Dataset: We used three large regression, YearPredictionMSD ,Slice , UJIIndoorLoc , and two NLP benchmarks, MRPC , RTE .",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 58,
+                            "text": "YearPredictionMSD",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 60,
+                            "end": 65,
+                            "text": "Slice",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 68,
+                            "end": 80,
+                            "text": "UJIIndoorLoc",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 107,
+                            "end": 111,
+                            "text": "MRPC",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 117,
+                            "text": "RTE",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Now consider RCV-Topics-Subsets dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 39,
+                            "text": "RCV-Topics-Subsets dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset contains the weekly influenzalike-illness statistics (patient counts) from 47 prefectures in Japan, ranging from 2009 to 2015.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Incorporating our module improves performance considerably (5%-10% relative) over baseline systems on both mini-ImageNet and tieredImageNet benchmarks, with overall performance competitive with recent state-of-the-art systems.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 150,
+                            "text": "ImageNet and tieredImageNet benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the recent work of Zoran and Weiss , we use 8-by-8-pixel patches of monochrome natural images, obtained from the BSDS300 dataset    gives examples).",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 138,
+                            "text": "BSDS300 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the centroid diameter \u2206 3 (S) requires knowledge of the dissimilarity function d(\u00b7, \u00b7) and not just the pairwise dissimilarity matrix between the objects in the original dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Without previous domain knowledge of a query and a collection, we assume that the query terms are identically distributed, i.e., the query terms have the same impact shape functions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recall from Section 3 that the AVSDv1.0 dataset contains 11,816 instances split across training (7,985), validation , and testing (1,968) correspond-ing to the source Charades video splits.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 47,
+                            "text": "AVSDv1.0 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this way, we can generate data from simulators and use such benchmark datasets for the evaluation of causal discovery algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset was chosen because it represents a simple, well-known example that is commonly used as a benchmark problem in evaluating clustering methods and is widely available, incorporated as a built-in data object in both the R and S-plus statistics packages.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We clearly observe the probability of a user repeatedly using the same opinion phrase to describe a particular feature is obviously higher in the original dataset than that in the permuted datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Enhanced by the novel corner pooling layer, CornerNet achieves superior performance on MS COCO object detection benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 121,
+                            "text": "MS COCO object detection benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2000 training examples as a validation set to choose the margin penalty parameter; after this parameter by cross-validation, we then retrained each SVM using all the training exam reference, we also report the best results obtained previously from three layer deep belief ne 3) and SVMs with RBF kernels (SVM-RBF).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For comparison of performance measures and model sizes, these datasets are exactly the same (except NEWS20) as used in state-of-the-art methods .",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 106,
+                            "text": "NEWS20",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In both Foursquare and Whrrl datasets, we find that when more factors are considered the performance turns out to be better.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 37,
+                            "text": "Foursquare and Whrrl datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We find the performance trend of the three algorithms is similar between PersonX and real-world datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 104,
+                            "text": "PersonX and real-world datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\u03b1)T H (\u03b1) \u22122 , and therefore need to know the variance of the gradient of F averaged across different equivalent datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, several real-world datasets from various application domains will be used to evaluate the efficiency of the proposed stochastic algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\"\"<song name>\" music review for each song in the data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Even on datasets for which fine-tuning weights pre-trained on ImageNet does not help significantly , e.g. Stanford Cars  and FGVC Aircraft , training with the ImageNet policy reduces test set error by 1.2% and 1.8%, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 159,
+                            "end": 167,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 62,
+                            "end": 70,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Without loss of generality, all our data sets will set !",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The intent of the Graph500 benchmark  is to rank computer systems by their capability for basic graph analysis just as the Top500 list  ranks systems by capability for floating-point numerical computation.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 36,
+                            "text": "Graph500 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They used only a small dataset and did not address question answering content in their work.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With the three distillation terms, the improvements for ResNet18 (0.5), ResNet18 (1.0) and ResNet18 (1.0) with weights pretrained from the ImageNet dataset are 6.26%, 5.74% and 2.9%, respectively, which indicates that the effect of distillation is more pronounced for the smaller student network and networks without initialization with the weight pre-trained from the ImageNet.",
+                    "annotation_spans": [
+                        {
+                            "start": 139,
+                            "end": 155,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 369,
+                            "end": 377,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we stress the scalability of our learning procedure and study FSE on the whole MNIST dataset with 10 classes and 60, 000 training instances.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 109,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our goal is to predict all the missing attributes in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , MAP of BM25 changes over different b, and CRTER can boost BM25 under all b's settings and over all the collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unfortunately, networks trained in this manner either demand a very large collection of unique local patches or simply lack generalization.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results obtained from one data set are often used to either corroborate or challenge results from another.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply Algorithm 4.1 to data streams generated from the Reuters Corpus RCV1-V2.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 80,
+                            "text": "Reuters Corpus RCV1-V2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "unpruned N -grams extracted from the english Europarl parallel corpus ; YahooV2",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 69,
+                            "text": "english Europarl parallel corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 72,
+                            "end": 79,
+                            "text": "YahooV2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, we trained the models on MR contrast dataset to learn the task of synthesizing the other contrasts.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 51,
+                            "text": "MR contrast dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Prior Network trained using the reverse KL-divergence, on the other hand, yields an estimate of total uncertainty which better reflects the structure of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The reason why we selected these data sets is that we concentrate our research on the detection of bursty change of behavioral patterns or data structures, rather than outlier detection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the pretrained word embeddings  of 50 dimensions trained on Wikipedia and AQUAINT corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 96,
+                            "text": "Wikipedia and AQUAINT corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, on the MSD spleen dataset, a challenging public dataset, EBP outperforms RSTN by more than 2%.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 35,
+                            "text": "MSD spleen dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Million Song Dataset (MSD)  is a collection of metadata and precomputed audio features for one million contemporary songs.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 30,
+                            "text": "Million Song Dataset (MSD)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Suppose that we are given two data sets Since we are interested mainly in noisy sensor data, the index n typically runs over discrete values of time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By fine tuning these different thresholds, we obtain  two refined datasets with decent amount of users and items, whose basic statistics are reported in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each of the 30 scenes, we capture images using these five different apertures, giving us a total of 150 images for the controlled dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since, in the case of binary data, our method reduces to a matrix-vector multiplication, we also came up with ten heuristic matrix-vector methods which we ran on the same queries, using the same datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It offered a statistically significant improvement over MNB, up to 11.04% and 12.56% in terms of overall accuracy, and up to 9.072% and 7.480% in terms of macro F 1, for the NPB and MLB data sets,",
+                    "annotation_spans": [
+                        {
+                            "start": 174,
+                            "end": 195,
+                            "text": "NPB and MLB data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results based on the TREC benchmarks showed that M 2 Div can signi cantly outperform the state-of-the-art baselines based on greedy sequential document selection, indicating the e ectiveness of the exploratory decision-making mechanism in M 2 Div.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 49,
+                            "text": "TREC benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, it took 50 annotators one month to clean the IMDB-Face dataset, which only contains 59K celebrities and 1.7M images.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 75,
+                            "text": "IMDB-Face dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On this data set we also have the opportunity to validate the unsupervised clustering by taking into account the simultaneously recorded ephys-trace.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "OER Collection Indexation via Meta-Search.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 14,
+                            "text": "OER Collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Wiki dataset consists of 2,866 Wikipedia documents, each of which contains one text-image pair.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "Wiki dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here we choose these parameters based on the corresponding validation set.  ) in green overlaying on the ground truth moving object mask in grey.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Complementary to ImageNet (mostly object-centric), we present here a scene-centric database, that we term the Places database.",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 125,
+                            "text": "Places database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 17,
+                            "end": 25,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To collect paired image and text collections for experiments, the names of these 10 categories are used as query keywords to crawl web pages from the Flickr web site and Wikipedia.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Gain obtained in the Computer and Science datasets is more when compared to the gain in Sports and Politics datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 50,
+                            "text": "Computer and Science datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, we also evaluate the merit of CAS on the CamVid dataset, which is a standard scene parsing dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 65,
+                            "text": "CamVid dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 4, we demonstrate the model on a dataset of multiple moving MNIST digits (Section 4.1) and compare it against AIR trained on each frame and Variational Recurrent Neural Network (VRNN) of  with convolutional architectures, and show the superior performance of SQAIR in terms of log marginal likelihood and interpretability of latent variables.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "provides the details of the datasets that are being used in this paper.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also observe the same pattern for the MAY dataset  .",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 52,
+                            "text": "MAY dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To train our models, we use either GTA5  or SYNTHIA  as source domain synthetic data, along with the training split of Cityscapes dataset  as target domain data.",
+                    "annotation_spans": [
+                        {
+                            "start": 119,
+                            "end": 137,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 35,
+                            "end": 39,
+                            "text": "GTA5",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 44,
+                            "end": 51,
+                            "text": "SYNTHIA",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We verify our analysis by experiments on real data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These procedures use one of 37 cluster quality measures to assess the influence of structure-destroying random permutations applied to the original dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We proceeded as follows: 50 courses with their reviews for each aspects (22 aspects) from the coursera dataset were extracted randomly.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 110,
+                            "text": "coursera dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Scene Flow datasets  contain stereo images in 960 \u00d7 540 pixel resolution with 35454 for training and 4370 for testing, and all image pairs are rendered from various synthetic sequences, i.e., FlyingTh-ings3D, Driving, and Monkaa.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "Scene Flow datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and We generate datasets with fixed D = 3000 and N ranging from 50 to 600.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Quote dataset  is a network of online media sites (e.g., news sites, blogs etc.).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "Quote dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, versioned document collections are usually stored using special differential (delta) compression techniques, and a number of researchers have recently studied how to exploit this redundancy to obtain more succinct full-text index structures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The official test set is split into a Public and Private set with a 30%/70% split.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We performed experiments on two sets of images: MNIST digits dataset and NORB object classification dataset 1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 68,
+                            "text": "MNIST digits dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 107,
+                            "text": "NORB object classification dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The IMDb dataset had several types of nodes and relations.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "IMDb dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MNIST and USPS image datasets are both handwritten Digits datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 33,
+                            "text": "MNIST and USPS image datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides, there are usually estimation errors, including missing detection, especially on the images of the re-ID dataset which usually have low resolution and blurring artifacts.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 120,
+                            "text": "re-ID dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2000 training examples as a validation set to choose the margin penalty parameter this parameter by cross-validation",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This phenomenon is more pronounced in the case of DBLP data set shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 63,
+                            "text": "DBLP data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Figures 1, 2, and 3 show the performance (in terms of MAP, P10 and Recall, respectively) for the four search engines on the FSupp Collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 140,
+                            "text": "FSupp Collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Taking the ISIC dataset for example, \u223c99% of images have more background than foreground pixels, and over 60% of images have less than 20% foreground pixels (see the blue bars in ).",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 23,
+                            "text": "ISIC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The goal for the experiments on ADNI database is to discriminate both MCI and AD patients from NC subjects, separately.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 45,
+                            "text": "ADNI database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Integrating regression labels with ranking information was proposed in  as a means to improve regression outcomes in label-imbalanced datasets, and similar approaches have been used to incorporate both \"pointwise\" and \"pairwise\" labels in image classification tasks .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, half entities in the EDR 2005 Chinese corpus do not contain any name mentions.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 57,
+                            "text": "EDR 2005 Chinese corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A wide selection of dataset visualizations is provided in the supplementary.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A convolutional dictionary with 128 elements which was trained on images from Berkeley dataset  is shown in figure 1.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 94,
+                            "text": "Berkeley dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Topic models, like PLSA  and LDA , provide a principled way to represent and analyze text collection by uncovering the hidden thematic structure of it auto-",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This so-called \"Million Musical Tweets Dataset\" 5 (MMTD) is an extension of the \"MusicMicro\" set , is freely available, and comprises almost 1,100,000 listening events of 215,000 users listening to a total of 134,000 unique songs by 25,000 unique artists.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 56,
+                            "text": "\"Million Musical Tweets Dataset\" 5 (MMTD)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 80,
+                            "end": 96,
+                            "text": "\"MusicMicro\" set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The office-31 dataset  is a standard benchmark dataset for domain adaptation, which contains 4, 110 images of 31 categories shared by three distinct domains: Amazon (A), Webcam (W) and DSLR (D).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "office-31 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our Digg data set contains a year of link descriptions, paired with the number of diggs each received during its first week on the homepage.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "Digg data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We built a stream dataset of keystroke dynamics based on CMU data .",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 65,
+                            "text": "CMU data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus there is a significant need for sampling negative items from the unseen samples in the collection 1 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to demonstrate the effectiveness of our proposed method, we further conduct experiments on another dataset called \"Breakfast\" , which is also widely-used for instructional video analysis.",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 134,
+                            "text": "\"Breakfast\"",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on benchmark datasets show that our method can significantly outperform state-of-the-art clustering approaches.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although confidence scores were assigned to each extracted NE based on its presence in a Web corpus, relying on Captilisation will often miss many potential NEs in the query log that were not typed with capital letters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the purpose of comparison, we report NDCG figures instead of U on the Letor3.0 data set, and we report U on the Digg.com data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 91,
+                            "text": "Letor3.0 data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 116,
+                            "end": 133,
+                            "text": "Digg.com data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since UT Multiview dataset uses learning-by-synthesis approach for generating more eye images, the generated images are very different from images in MPIIGaze.",
+                    "annotation_spans": [
+                        {
+                            "start": 6,
+                            "end": 26,
+                            "text": "UT Multiview dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 150,
+                            "end": 158,
+                            "text": "MPIIGaze",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on UCI datasets support our theoretical analysis and give additional insight into the relationship between boosting and logistic regression.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 27,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TDT2 data set with k = 10, fourth row: 20 newsgroup data set with k = 5, fifth row: 20 newsgroup data set with k = 10.",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 105,
+                            "text": "newsgroup data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 13,
+                            "text": "TDT2 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Notice that all the datasets are of a relative small size (hundreds of training samples) which means that adding more parameters (the consequence of adding the CNN modules) would hurt the performance due to overfitting.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report the average PSNR numbers on each dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Even though the tail label problem is not acute on these data sets, and SLEEC was restricted to a single learner, SLEEC's predictions could be significantly more accurate than all the other methods (except on Delicious where SLEEC was ranked second).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "P. Across datasets we can see quite similar values, depending on the number of clusters used.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since TAC dataset is in biomedical domain, many of the biomedical terms might be either outof-vocabulary or not captured in the correct context using general embeddings, therefore we also train biomedical embeddings (WE Bio )  .",
+                    "annotation_spans": [
+                        {
+                            "start": 6,
+                            "end": 17,
+                            "text": "TAC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We provide simple algorithms for its robust computation on triangle meshes as well as point clouds, and demonstrate its effectiveness on datasets addressing deformable matching of meshes as well as rigid point cloud registration;",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare our proposed fast spectral LDA algorithm with baseline spectral methods and collapsed Gibbs sampling (using GibbsLDA++  implementation) on two real-world datasets: Wikipedia and Enron.",
+                    "annotation_spans": [
+                        {
+                            "start": 175,
+                            "end": 194,
+                            "text": "Wikipedia and Enron",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This procedure was chosen because it is available in both the R and S-plus software packages, it has been described in reasonable detail , and it overcomes a number of the known limitations of the more popular k-means clustering procedure (e.g., its outlier sensitivity and its dependence on the original ordering of the objects in the dataset).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A graphical depiction of the collection process is shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We select postcards with texture ranging from medium to high for both background and reflection, and combine them pairwise in a manner that our dataset has a wide diversity of complex overlapping textures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By design, our datasets include many irrelevant \"distracters\" features, called \"probes\".",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To address this hypothesis, we conduct experiments by indexing TREC GOV2 corpus using Terrier 2 , applying Porter's English stemmer and removing standard stopwords.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 79,
+                            "text": "TREC GOV2 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the Yandex click log dataset provided by the Relevance Prediction Challenge and Workshop on Web Search Click Data (WSCD 2012).",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 35,
+                            "text": "Yandex click log dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Commonly used feature representations for either video classification or action recognition are I3Dbased features by Carreira et al. , extracted from an action recognition dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A  70.0% N/A 10-30s : The average pixel-wise classification accuracy and average computation times on the MSRC and Street-Scene datasets of the three variants of our approach with those of the state-of-the-art CRF-based methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 136,
+                            "text": "MSRC and Street-Scene datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For Spam Email datasets,  show the comparison results for each task, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "Spam Email datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On AP and Robust2004 datasets, the ranking learner works much better than it works on WSJ dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 29,
+                            "text": "AP and Robust2004 datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 97,
+                            "text": "WSJ dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare our approach with the leading players on the ScanNet (v2) benchmark, including SGPN  and a projected Mask R-CNN (PMRCNN) approach .",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 78,
+                            "text": "ScanNet (v2) benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As labels of test set are held out, we report results on the validation set, similar to all related works .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After the collection D * is generated, we merge D and D * to train SPMM again.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All reported results are averages over 5 folds (test sets).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To validate the robustness of our proposed method, we also tested the proposed method on the CVPR workshop CLIC validation dataset  with large and various resolutions up to about 2K. To test the performance of video compression, we use the widely used Video Trace Library (VTL) dataset , which includes 20 videos with the resolution of 352\u00d7288 and 8 test sequences with the resolution of 832 \u00d7 480 and 416 \u00d7 240, which are commonly used by video coding standardization group with rich texture scenes and motion scenes.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 130,
+                            "text": "CLIC validation dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 252,
+                            "end": 285,
+                            "text": "Video Trace Library (VTL) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We trained SFNNs to generate colorful images of common objects from the Amsterdam Library of Objects database , conditioned on the foreground masks.",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 109,
+                            "text": "Amsterdam Library of Objects database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Scaling LD-MKL to truly large datasets can present a challenge because we make use of kernelized support-vector regression.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This happens because segments with the same role in the CNN collection may have two or more versions of labels (root-to-segment path), i.e., it is possible to exist two or more segment classes formed by segments with the same function in such collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Synthetic datasets are an attractive alternative given their scale and readily-available ground truth.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We now present face verification on the more challenging Labeled Faces in the Wild (LFW) benchmark, where our experiments will show that there is an advantage to balancing disciminability and robustness.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 98,
+                            "text": "Labeled Faces in the Wild (LFW) benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also evaluate it on real-world sparse datasets and demonstrate its usability in both network traffic analysis and discussion tracking.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "words frequent in the news corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 33,
+                            "text": "news corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The improvements demonstrate the superiority of regularizing the feature distributions between attribute dataset and re-id dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, on the speech and MIDI datasets, models with auto-regressive (lower-half) output distributions obtain a dramatic advantage over models with fully factorized output distributions (upper-half), achieving new SOTA results on three speech datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 40,
+                            "text": "speech and MIDI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For weak generalization training data from all users is used to learn the models, and average NDCG is computed on the withheld test data from same users (see Section 2 for details on NDCG calculation).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In a second, mixed scenario, we consider each dataset entirely, thus building a solution for a more heterogeneous object set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(Multi-Human Parsing) dataset, which contains 25,403 elaborately annotated images with 58 fine-grained semantic category labels.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 29,
+                            "text": "(Multi-Human Parsing) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a more challenging problem we collected a data set of over 450 character samples of the letters a, b and c.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this experiment, we evaluated the efficiency improvement of GRLSI and GNMF over RLSI and NMF on the Wikipedia dataset and the Web-I dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 129,
+                            "end": 142,
+                            "text": "Web-I dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 103,
+                            "end": 121,
+                            "text": "Wikipedia dataset ",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments on an entity search test collection based on DBpedia confirm that learning to rank methods are as powerful for ranking entities as for ranking documents, and establish a new state-of-theart for accuracy on this benchmark dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We plot the comparison between sparse-basis and ransac in figures 2 and 4 for the two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments we show quantitative comparisons of different models, analyze model ablations and describe our state-of-theart results on the RGB-D dataset of Lai et al. .",
+                    "annotation_spans": [
+                        {
+                            "start": 145,
+                            "end": 158,
+                            "text": "RGB-D dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The most commonly studied benchmark dataset is Cylinder-Bell-Funnel, a synthetic dataset introduced in  and used by  and others.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From our study we conclude that new test collections are needed to catalyze research into a generally overlooked though important type of query.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, we perform extensive quantitative and qualitative analysis of this model and compare to prior work on two challenging datasets, namely basketball and soccer.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Empirical experiments demonstrate that our method outperforms state-of-the-art methods over the five benchmark datasets, including both traditional and deep network-based models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, in order to ensure effective data collection, it is important to design methods which can mine the data with a guarantee of privacy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We set the maximum number of iterations to be Max it er , and also use a function EarlyStop() to control early termination of the optimization process, taking the loss on the validation set as input.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact, VQA datasets  suffer from inherent bias, which hinders learning true visual concepts from the datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 21,
+                            "text": "VQA datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, in case of Caltech-5 and Oxford flowers datasets, the accuracies reported are the testset accuracies averaged over 10 such randomly sampled training and test datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 54,
+                            "text": "Caltech-5 and Oxford flowers datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our exhaustive empirical study on more than 30 synthetic and real-world data sets demonstrates that it is also beneficial for CUR-like decompositions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Due to the unbalanced nature of the DHA dataset, neither F1 nor accuracy are representative metrics for classification.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 47,
+                            "text": "DHA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The VOT-2018 public dataset is one of the most recent datasets for evaluating online modelfree single object trackers, and includes 60 public sequences with different challenging factors.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 27,
+                            "text": "VOT-2018 public dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "IS and R-precision values on the CUB dataset compared with the state of the art, and has a competitive performance on the COCO dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 44,
+                            "text": "CUB dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 134,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The algorithm for IDDTW begins by using the classic DTW on the first K candidates from the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The UQV100 collection supports some new investigations in query clustering by having so many query variations per backstory.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "UQV100 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "formalizes the VRD task and provides a dataset, while addressing the subject (or object) and predicate classification models separately.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The story is slightly different on the Weibo-Response data set, which is significantly more challenging than the Q-A task in that it relies more on the content of texts and is harder to be captured by bag-of-words.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 62,
+                            "text": "Weibo-Response data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Among these datasets, \"w3a\" is text data which is of relatively high dimension but very sparse.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 26,
+                            "text": "w3a",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Reuters dataset, we consider average Macro-F1 of its 10 classification tasks and for SRAA dataset, we only consider \"realauto-realaviation-simauto-simaviation\" classification task, as \"real-sim\" and \"auto-aviation\" classification task use the same set of labeled topics.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 23,
+                            "text": "Reuters dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 93,
+                            "end": 105,
+                            "text": "SRAA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The augmented dataset is divided into training and validation set by 4 : 1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe that (some version of) Condition (2) is a standard requirement in existing differentially private algorithms that preserve the privacy of the validation dataset while selecting a h \u2208 C that approximately maximizes q(h, V ), even if it is not required to maintain privacy with respect to the training data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the Figure we can see that the performance of McWL on both datasets increases as k increases, and it achieves stable performance when k is between 12 and 20.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is a full-day tutorial on building and validating test collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We found that a learning rate discrepancy with a factor of 100 when training using H3.6M data as the only source of 3D pose labels yields the best result when tested on the MPI-INF-3DHP dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 93,
+                            "text": "H3.6M data",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 173,
+                            "end": 193,
+                            "text": "MPI-INF-3DHP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It shows that marginal probability distributions of variables in both datasets are similar.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the concept hierarchy models (static and adapted versions) built from the Intranet collections of two academic institutions and compare them with a state-of-theart log-based query recommender, the Query Flow Graph, built from the same logs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "et al.'s MAN dataset  contains 19,320 blog posts.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 20,
+                            "text": "MAN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the previous methods are mainly tested on PETS2009, which only contains low/moderate crowd numbers on a walkway, here we test on a newly collected dataset comprising a real-world scene of a street intersection with large crowd numbers, vehicles, and occlusions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Only novel molecules (those not appearing in the ZINC training set) are included in the metric computation, to avoid rewarding memorization of the training data.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 66,
+                            "text": "ZINC training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The estimate can be however more precise if we exploit the knowledge of F 2 , i.e. the frequent 2-itemsets in D. During the second dataset scan, we can also compute F 2 , by using further M 2 counters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A test collection is composed of a collection of documents, a set of topics, and a set of relevance judgements.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extrinsic set (Set 2): is generated for cross-dataset validation, where it consists of 2,881 sRGB images rendered from four mobile phones (iPhone 7, Google Pixel, LG G4, and Galaxy S6 Edge) and one of the DSLR cameras (Olympus) from the NUS dataset that was excluded from Set 1.",
+                    "annotation_spans": [
+                        {
+                            "start": 237,
+                            "end": 248,
+                            "text": "NUS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We start by noting that, on the MNIST test set, an improvement of 0.1% is statistically significant .",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 46,
+                            "text": "MNIST test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "provides the C-index values obtained by various regression methods on the real-world high-dimensional micro-array cancer   datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Conducting the experiment on the manually downloaded IITB data set results in a disambiguation accuracy of 0.757 (micro-averaged).",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 66,
+                            "text": "IITB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This paper proposes a solution to the aforementioned problem that avoids the need for re-training neural architectures on large-scale datasets containing artistic images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data collection device provides pen trajectory information as a sequence of (x, y) coordinates at regular time intervals (10-15 ms).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A more thorough investigation of the performance of the algorithms for different values of k and for all datasets, refer to the full version of the paper .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used three standard TREC collections: AP (topic 51-150), WSJ (topic 51-150) and GOV2 (topic 701-800).",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 43,
+                            "text": "AP",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 60,
+                            "end": 63,
+                            "text": "WSJ",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 83,
+                            "end": 87,
+                            "text": "GOV2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Accompanying the dataset is a suite of new metrics that evaluate essential qualities such as consistency, grounding and plausibility.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We set the adaptor parameter a Word = 1 for the start nonterminal symbol Word, so we adapt the Word c , Stem c and Suffix c nonterminals for each hidden class c. Following , we used this grammar with six hidden classes c to segment 170,015 orthographic verb tokens from the Penn Wall Street Journal corpus, and set a = 0 and b = 500 for the adapted nonterminals.",
+                    "annotation_spans": [
+                        {
+                            "start": 274,
+                            "end": 305,
+                            "text": "Penn Wall Street Journal corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Good initial values of the coefficients and \u03c3 turn out to be vital for successful optimization, especially in a high-dimensional data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The detailed statistics of the three datasets are summarized in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Domain specific dataset contributions were very recently proposed with the \"CrackForest\" dataset , the CSSC database  and SDNET2018 .",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 116,
+                            "text": "CSSC database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 75,
+                            "end": 96,
+                            "text": "\"CrackForest\" dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 131,
+                            "text": "SDNET2018",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply the introduced method to the generation of organic molecules with arbitrary composition based on the QM9 dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 121,
+                            "text": "QM9 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 5 demonstrates our experimental results on several realworld datasets while Section 6 concludes our work.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following a similar setting as in , we randomly selected 100 questions from the labeled test set originated from Yahoo!",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset contains 1494 video clips covering 10 categories.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The FLIC dataset is comprised of 5003 images from Hollywood movies with actors in predominantly front-facing standing up poses (with 1016 images used for testing), while the extended-LSP dataset contains a wider variety of poses of athletes playing sport (",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "FLIC dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 174,
+                            "end": 194,
+                            "text": "extended-LSP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Generally, the graph based SSL methods  first model the whole dataset as a weighted undirected graph G =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Current studies use a broad range of notations and terminology, perform experiments using different and mostly proprietary datasets, do not detail the model inference procedures used and, thus, do not provide a general systematic view on the research area.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Cross-Subject, 40, 320 samples performed by 20 subjects are separated into training set, and the rest belong to test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to AC-GAN, our TAC-GAN successfully replicates the real data distributions on simulated data and significantly improves the quality and diversity of the class-conditional image generation on CIFAR100 , VGGFace2 , and ImageNet1000  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 200,
+                            "end": 248,
+                            "text": "CIFAR100 , VGGFace2 , and ImageNet1000  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Their model can be regarded as a semi-supervised PLSA model with specifications as pre-defined topics and concatenated reviews as documents, which maximizes the following log-likelihood function of the whole data set: where c(w, r p ) is a word count in concatenated reviews r p for p, and \u03c6 B is a background language model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the location dataset, we also present a selection strategy.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 24,
+                            "text": "location dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is because Eltwise9 learns the general distribution of the dataset, which is less related to the network architecture.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2 k\u22121 (X ), constrains that the input dataset X cannot be well clustered using k \u2212 1 instead of k clusters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We design the potential functions carefully from the climate datasets to ensure spatio-temporal consistency, i.e., the neighboring nodes in the 3-dimensional grid are encouraged to take the same value.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the direct and SVM ranking results to the human-annotated labels provided in the CAL-500 dataset and evaluate the rankings using two metrics: the area under the receiver operating characteristic (ROC) curve (denoted AUC) and the mean average precision (MAP) .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ICDAR2017-MLT  is a large scale multi-lingual text dataset, which includes 7200 training images, 1800 validation images and 9000 test images.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "ICDAR2017-MLT",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This means that we can learn transition and emission probabilities directly from observations in our training data set of harmonisations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Transaction cost-adjusted wealth for the NYSE dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 55,
+                            "text": "NYSE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our first experiment simulated participation in the TREC 4 AdHoc Task, using the TREC 4 test collection consisting of 567,528 documents, 49 topics, and the official NIST gold standard of relevance .",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 58,
+                            "text": "TREC 4",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 81,
+                            "end": 103,
+                            "text": "TREC 4 test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comparison over Embedding Approaches: For CIFAR-100 and ImageNet1K datasets, we have compared three embedding approaches: (1) deep boosting ;  traditional deep embedding ; (3) our deep embedding algorithm.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 75,
+                            "text": "CIFAR-100 and ImageNet1K datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the results on both data sets reveal an interesting trend: the multilayer arc co often perform better than their single layer counterparts.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, in the Appazaar dataset we have over 7000 items, which means that a sampling size of 200 could save over 50% of the buffer construction time, as illustrated in .",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 36,
+                            "text": "Appazaar dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the standard settings of unsupervised domain adaptation, each category has labeled source dataset S =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Previous approaches build models on datasets which do not simulate the real-world data well (e.g., small  scale, insignificant variance, etc.).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We ran our experiments on the Chinese Treebank (and its English translation) , limiting the product of the sentence lengths of the two sentences to p \u00d7 q \u2264 130.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 46,
+                            "text": "Chinese Treebank",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this subsection, we compare the quality of selected features by NetFS and other baseline methods on the three above mentioned datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The title language model performs most similarly to CTR, with CTR only performing slightly better on the SJM dataset, for instance.",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 116,
+                            "text": "SJM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The advantage of the MSR dataset is that all data are in the linear color space where pixel measurements are proportional to the number of counted photons, and no postprocessing steps have been performed (e.g., sharpening, tone mapping) that will alter the image statistics.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 32,
+                            "text": "MSR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the publicly available 300 dimensional word vectors by , trained on part of the Google News dataset (\u223c100B words).",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 106,
+                            "text": "Google News dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the Netflix dataset, MudRecS achieves a RMSE score 8 8 MAE scores were not computed on the Netflix dataset due to their unavailability for the other 20 recommenders.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 22,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 94,
+                            "end": 109,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both query expansion experiments on four TREC collections and query classi cation experiments on the KDD Cup 2005",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 113,
+                            "text": "KDD Cup 2005",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Lastly, we note that our RMSE value on the NGSIM dataset is quite high, which we attribute to the fact that we used a much higher (2X) sampling rate for averaging than prior methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 56,
+                            "text": "NGSIM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Instead, we evaluate the effectiveness of our approach by investigating whether its automatically identified domains improve adaptation, that is, whether recognition accuracy on the target domains can be improved by reshaping the datasets into their latent source domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, the words on the TV data set changes dramatically.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 43,
+                            "text": "TV data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This methodology achieves robust domain agnostic models over computer vision collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To train our models, we first create a static dataset of agent trajectories by generating training episodes based on shortest-paths from agent spawn locations to the best view of the object of interest.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These rules cover 97% of the training set and perform the correct case-role assignments on 84% of the 1307 sentences in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let O = {o 1 , \u00b7 \u00b7 \u00b7 , o n } be a dataset containing n objects in the R d feature space.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments conducted on publicly available datasets showed that the proposed algorithm improves over the state-ofthe-art with respect to both nDCG and nMCG.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the reconstruction accuracy (IoU) on the ShapeNet dataset using our method and that presented in   .403",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 63,
+                            "text": "ShapeNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To test our model's utility in intelligent web crawling, we conducted experiments on the WebKB data set using the greedy solution.",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 103,
+                            "text": "WebKB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We investigate the following rewriting methods in our experiments: Synonyms: replace a word with its shortest synonym from Word-Net 3.0 1 , generating the correct full word form based on the CMU English morphology database (e.g. \"vehicle\" \u2192\"car\").",
+                    "annotation_spans": [
+                        {
+                            "start": 191,
+                            "end": 222,
+                            "text": "CMU English morphology database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The underlying data structure is split into shards according to document length, and each shard comprises of a collection of mapping matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The setting of the parameters shared by all datasets was as follows.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate major causal discovery algorithms with datasets generated from our simulator.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Active Data Repository (ADR)  is a runtime infrastructure that integrates storage, retrieval and processing of multi-dimensional datasets on a distributed memory parallel machine.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "use case addresses the needs of a test collection builder to help them control the assessments of the selected documents using the application as a dashboard.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recall of 3DSmoothNet on the 3DMatch data set remains high even when the inlier threshold ratio is increased.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 45,
+                            "text": "3DMatch data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results pertaining to real world datasets from the UCI Machine : Results for the online methods executed on a blood-cell scRNA dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We considered various standard synthetic-and realworld data sets, consisting of both dense and sparse matrices of various sizes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The rst is an instance from the DUC 2004 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 48,
+                            "text": "DUC 2004 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thanks to these datasets,  Code for all experiments hosted at https://github.com/matthewwicker/IterativeSalienceOcclusion Extended paper in  3D deep learning has enjoyed increased attention from machine learning practitioners.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These references are representative of th state-of-the-art for deep and shallow architectures on these data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This could be attributed to the fact that Netflix dataset is considerably larger and more sparse.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 57,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Collection of TREC-like document relevance judgments by third party raters (that are commonly used in other information retrieval tasks) such as LETOR data set  are difficult to obtain due to privacy restrictions.",
+                    "annotation_spans": [
+                        {
+                            "start": 145,
+                            "end": 159,
+                            "text": "LETOR data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, until now this method has only been tested on small dataset like MNIST and CIFAR10",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 91,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 74,
+                            "end": 79,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For both Movielens10m and Netflix datasets, we consider the top n = 200 users and the top m = 500 movies.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 42,
+                            "text": "Movielens10m and Netflix datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The significant subgraphs from related source datasets are utilized to improve pattern mining in the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since we focus on implicit user feedback signals, it would be interesting to explore the possibility of having manual annotators or crowdsourcing workers to label the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The recognition tests are performed over four regular scene text datasets IC-DAR2013 , ICDAR2015 , SVT ,",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 84,
+                            "text": "IC-DAR2013",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 87,
+                            "end": 96,
+                            "text": "ICDAR2015",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 99,
+                            "end": 102,
+                            "text": "SVT",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our quantitative results on the benchmark datasets show that the structure recovered by our approach is consistent with classes defined by human labelers better than or at the level of other clustering approaches.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Loc ation-Aware Topic Model Concept Sequence Generation Venue Topic Distributions Venue Relevance Matching Music Concept Detection Venue-labeled Music Collection Music Dataset Concept-labeled Music Collection VenueMusic Server Train Mobile Device Song Topic Distributions Playlist Song Topics SYSTEM OVERVIEW.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both approaches yield significantly better results than either a token-based or a one-best translation baseline on standard test collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "1 compares the performances of our network and the most recent HPE methods on the MPII testing set.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 98,
+                            "text": "MPII testing set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If the target dataset is small and the number of parameters is large, fine-tuning may result in overfitting, so the features are often left frozen.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our tests conducted on publicly available LtR datasets confirm unprecedented speedups (up to 6.5x) over the best state-of-the-art competitor.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 54,
+                            "text": "LtR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the WebBase dataset, 2000 target pages are selected from a 2005 crawl.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 23,
+                            "text": "WebBase dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments regarding different aspects of STA demonstrate that STA outperforms state-of-the-art models on open set domain adaptation datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Application Datasets: We compare our model to baselines averaged over 5 runs with different seeds in natural language understanding and computer vision applications and note the relative increase in number of params for each method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The running time of Na\u00efve Greedy (NG) on this dataset exceeds 10 days.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For larger test set, we could choose larger k.  Iterative.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For internal linguistic features, we count the frequencies of all relation triplets in the training set, and transform these frequencies into probability distributions according to the subject's and object's categories, based on Naive Bayes with Laplace smoothing .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This behavior remains similar for the CIFAR-10 dataset, where Sparse-Fool computes on average, perturbations of 2.4x higher sparsity, and is 15.5x faster.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 54,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Based on the Appazaar dataset, the recommendation performance of TFMAP and all the baselines except FM is shown in , from which we obtain three observations.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 29,
+                            "text": "Appazaar dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second collection, TREC123, was created by combining existing TREC collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 82,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 23,
+                            "end": 30,
+                            "text": "TREC123",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we compare our BiC method with the state-of-the-art methods on two large datasets (ImageNet-1000 and Celeb-10000).",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 113,
+                            "text": "ImageNet-1000",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 118,
+                            "end": 129,
+                            "text": "Celeb-10000",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Because the confounder in this dataset is known explicitly, we tested the ability of Kronecker GLASSO to recover it from observational data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover,  shows that the distribution on top 200 word n-grams in terms of max c{\u03c7 2 c } contains word bigrams and trigrams (37.6%, 52.0%, 28.1% for the NPB, TV, and MLB data sets).",
+                    "annotation_spans": [
+                        {
+                            "start": 153,
+                            "end": 179,
+                            "text": "NPB, TV, and MLB data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our approach outperforms a one-class SVM on all four datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Subsequently, four other successful and influential datasets followed which are as follows: COCO-QA , VQA , FM-IQA , Vi-sual7w .",
+                    "annotation_spans": [
+                        {
+                            "start": 92,
+                            "end": 99,
+                            "text": "COCO-QA",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 102,
+                            "end": 105,
+                            "text": "VQA",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 108,
+                            "end": 114,
+                            "text": "FM-IQA",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 117,
+                            "end": 126,
+                            "text": "Vi-sual7w",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "depend on n, which could be high for large datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For one, a substantial fraction of web pages in the ClueWeb09 collection no longer exists in the wild; and often enough, the domain of the page either no longer exists or has been taken over by a \"domain parking\" service.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 72,
+                            "text": "ClueWeb09 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The lack of test collections containing ambiguous queries is highlighted and a method for creating collections from existing resources is described.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We generate a new large-scale synthetic dataset with objects manipulated by hands: ObMan (Object Manipulation).",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 88,
+                            "text": "ObMan",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The right panels of figures 2 and 3 show the test set error rates from arc cosine kernels",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In BCISO training sets, only a few outliers are used as normal instances, which supports our idea of discarding some instances in the interval after the labeling set.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 22,
+                            "text": "BCISO training sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since it is hard for human to label searcher satisfaction for such a big dataset, we randomly sampled it to generate a smaller dataset consisting of 614 clicked questions following 457 queries issued by at least two users.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate the benefits of our model over state-of-the-art trajectory prediction methods on standard datasets and on a novel dense traffic dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "where k is the actual size of the training data set used to perform approximate empirical risk minimization in the extremes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since infrequent items will not contribute to the collection of frequent closed itemsets, we can remove them without affecting the correctness of the algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second criterion is in principle more useful since it uses additional data to assess the association, but it is of little value if the background corpus is small or if the words or the word pair does not occur in the corpus.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the whole relation network is completely different from  due to different tasks, it is clear that our relation module is more effective than the relation model used in , as we have consistent improvement (e.g., 92.0% vs 90.0%, respectively, on the MS Dataset).",
+                    "annotation_spans": [
+                        {
+                            "start": 254,
+                            "end": 264,
+                            "text": "MS Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The right panels of figures 2 and 3 show the test set error rates from arc cosine",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Samaneh is a senior member of SFU Database and Data mining lab under supervision of Dr. Martin Ester.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 42,
+                            "text": "SFU Database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast to , which reports retrieval results with LDA smoothed LM (LDA-LM) on individual document subsets (and their corresponding relevance judgements) from the TREC collection as categorized by their sources, i.e. the \"LA Times\" and the \"Financial Times\", we instead executed LDA on the whole TREC collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 166,
+                            "end": 181,
+                            "text": "TREC collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 299,
+                            "end": 314,
+                            "text": "TREC collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To train our model, we use the AVSpeech dataset , comprised of millions of video segments from YouTube with more than 100,000 different people speaking.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 47,
+                            "text": "AVSpeech dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Suppose now that the designer selects the first experiment S 1 uniformly at random from the collection of subsets of cardinality 1",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to evaluate the actual complexities we have computed, for the K ODD\u2212ST h kernel, the number of nodes in each BigDAG on the NCI1 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 132,
+                            "end": 144,
+                            "text": "NCI1 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The TREC 2011 and 2012 diversity test collections have graded relevance assessments; all TREC diversity test collections (2009-2012) have the informational and navigational subtopic tags.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 49,
+                            "text": "TREC 2011 and 2012 diversity test collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 132,
+                            "text": "TREC diversity test collections (2009-2012)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our model achieves state-of-the-art results on the existing TVHID-LAEO video dataset, significantly outperforming previous approaches.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 84,
+                            "text": "TVHID-LAEO video dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the mnist-back-rand data set, the best MKM used an n = 1 arc-cosine kernel and 300-90-105-136-126-240 features at each layer.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 32,
+                            "text": "mnist-back-rand data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For image retrieval, our method outperforms recent state-of-the-art techniques on the NUS-WIDE animal dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 109,
+                            "text": "NUS-WIDE animal dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "According to this table, modifying the loglogistic method using each of the proximity functions improves the retrieval performance, in all collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, better results are obtained when a larger dataset with high degree of intra-class variation is used as the reference dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that our deep recursive neural networks outperform shallow recursive nets of the same size in the fine-grained sentiment prediction task on the Stanford Sentiment Treebank.",
+                    "annotation_spans": [
+                        {
+                            "start": 152,
+                            "end": 179,
+                            "text": "Stanford Sentiment Treebank",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "datasets, and the other one on the Paleo dataset for identifying the monsoon climate patterns in Asia.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 48,
+                            "text": "Paleo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "use case addresses the needs of researchers when (a) interested in checking the properties of a test collection, e.g. visualize the pooled runs, assess the behavior of each topic, bias of the nonpooled or new systems, or (b) interested in juxtaposing two or more test collections to compare their properties.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, GA-Fast-RCNN, GA-Faster-RCNN and GA-RetinaNet improve overall mAP by 2.2%, 2.7% and 1.2% respectively on COCO dataset over their baselines with sliding window anchoring.",
+                    "annotation_spans": [
+                        {
+                            "start": 119,
+                            "end": 131,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we will illustrate our approach on both synthetic and real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our evaluation is done on the ClueWeb09 Category B retrieval collection 2 , which is also used in both WT-2009 and WT-2010.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 71,
+                            "text": "ClueWeb09 Category B retrieval collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 103,
+                            "end": 110,
+                            "text": "WT-2009",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 115,
+                            "end": 122,
+                            "text": "WT-2010",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The topicality T P (t) of a term t is estimated as its contribution to the KL divergence between this relevance model and the language model for the entire retrieval collection: It is equivalently t's contribution to the clarity score of the query q .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Based on the embedding feature for each training video, we train an additional threelayer fully connected network with a classification loss to get the customised feature descriptor on the iQIYI-VID dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 189,
+                            "end": 206,
+                            "text": "iQIYI-VID dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show the test set error rates from arc-cosine kernels of varying degree (n) and levels of recursion ( ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "o-vMF substantially outperforms o-LDA and o-EDCM for all datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our results indicate that, as expected, the Clustered Dataset results in better-trained topic models than the Single-Tweet Dataset, regardless of the number of topics.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 61,
+                            "text": "Clustered Dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 110,
+                            "end": 130,
+                            "text": "Single-Tweet Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments, however, indicate that with the amounts of noise prevalent in existing datasets, and by removing collinear triplets, our solutions always satisfy these constraints to a good numerical precision.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent work revisits the layered model to handle occlusions , but does not explicitly model the layer ordering or achieve state-of-the-art performance on the Middlebury benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 158,
+                            "end": 178,
+                            "text": "Middlebury benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Robust04 and Gov2 collections, a standard Porter stemmer is used.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 37,
+                            "text": "Robust04 and Gov2 collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "search query vectors constructed by summing word vectors for word tokens in a query, trained using Dcontent data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 116,
+                            "text": "Dcontent data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Frames with low and high memorability scores coming from 4 different videos of the memorability dataset .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each member in this partition corresponds to some parental state u, and in general, it is a union of a collection of non-intersecting half-open or closed time intervals that are subsets of [0, T ].",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the document vector representation still suffers from the extreme sparsity problem in short text data, because of the shortness of short texts limits each term occurs in a relative small part of documents in a collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recently, a dataset of music listening activities inferred from microblogs has been released .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For stacked networks, significant improvements (over 1.2dB)   are achieved on the GoPro dataset by increasing the network depth.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 95,
+                            "text": "GoPro dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each data set was randomly split into a training set S of |S| examples and a testing set T of |T | examples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, scholars, organizations and institutions commonly share conference slides, video lectures, tutorials, Wikipedia pages, datasets and source codes generated from research.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Nevertheless, there are a great many large collections and for search engines retrieving from such corpora, it would appear important for them to deal effectively with ambiguous queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we present a data collection solution along with a data synthesis technique to simulate digital medium-based face spoofing attacks, which can easily help us obtain a large amount of training data well reflecting the real-world scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments on the challenging PASCAL VOC dataset, we find the inclusion of our discriminative, automatically detected configurations to outperform all existing state-of-the-art methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 56,
+                            "text": "PASCAL VOC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, it mines a collection of frequent itemsets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first one is the Graph-Based Visual Saliency (GBVS) model proposed in , which is one of the best performers in the evaluation benchmark of MIT300 3 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train R-MVSNet on the DTU dataset , which contains over 100 scans taken under 7 different lighting conditions and fixed camera trajectories.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 36,
+                            "text": "DTU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply this mapping to the entire dataset before doing regression.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Mix data set contains seven clusters with arbitrary shapes.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "Mix data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We chose these two biomedical domain ontologies because they are in the same domain as the articles in the TAC dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 118,
+                            "text": "TAC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Noting the lack of HDR panoramas, they leveraged the same LDR panorama dataset  to regress first from the input image to a LDR panorama and light source locations and then refined the model for light source intensities with 2100 new, captured HDR panoramas.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 78,
+                            "text": "LDR panorama dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on the Hopkins 155 benchmark dataset  show that our method is superior to other approaches in terms of the qualities described above, including computational efficiency.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 48,
+                            "text": "Hopkins 155 benchmark dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Suppose we have two partitions D 1 and D 2 , and the two collection, C 1 and C 2 , of the closed itemsets mined from them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When working with Legal Track test collections we need to think a bit differently about reliability and reusability.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 46,
+                            "text": "Legal Track test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the replicated blockF l , which is initialized from F l , can be optimized towards the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "More specifically, deleting the simulated data of the pathophysiology diagnosis and the pattern diagnosis variables leads to confounding in the dataset because they have at least two direct effects.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, given a projected dataset D h = D [h,h+1) , only h and those items x,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As example, we show the topics generated by GRLSI and GNMF in the setting of (K s = 20, K c = 8(10), \u03bb 1 = 0.01, \u03bb 2 = 0.1) for both Wikipedia and Web-I.  and  present example topics randomly selected from the topics discovered by GRLSI and GNMF on Wikipedia and Web-I. For each of the datasets and each of the methods, 3 shared topics and 9 class-specific topics are presented.",
+                    "annotation_spans": [
+                        {
+                            "start": 132,
+                            "end": 152,
+                            "text": " Wikipedia and Web-I",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 249,
+                            "end": 268,
+                            "text": "Wikipedia and Web-I",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on three benchmark datasets, TACoS, Charades-STA and DiDeMo, show that our method achieves the state-of-the-art performance with a high detection speed, demonstrating both effectiveness and efficiency of our method.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 56,
+                            "text": "TACoS",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 58,
+                            "end": 70,
+                            "text": "Charades-STA",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 75,
+                            "end": 81,
+                            "text": "DiDeMo",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments show that the proposed method achieves the best performance on an extensive 3D dataset compared with the state-of-the-arts in terms of mean angular error and computational complexity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For our evaluation, we used several publicly available high-dimensional gene expression cancer survival benchmark datasets 1 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Looking at the mixture weights, which may reflect the importance of each model, we observed that the best settings in all the collections vary in the following ranges: 0.1\u2264\u03b1 0 \u22640.2, 0.1\u2264\u03b1 Dom \u22640.2, 0.1\u2264\u03b1 K \u22640.2 and 0.5\u2264\u03b1",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Much of the recent success in these areas is due to large neural networks trained on massive human-annotated datasets collected from the web.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test a Gaussian model with a Normal Inverse-Wishart prior on the MNIST dataset  by first running PCA on the 70,000 training and test images to 50 dimensions.",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 81,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We propose to automatically disambiguate query terms by employing noun phrases that are extracted using the global analysis of the patent collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a result, there have been a slew of recent papers on reducing annotator effort in producing test collections: Cormack et al. , Zobel , Sanderson and Joho , Carterette et al. , and Aslam et al. , among others.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On small datasets, features extracted from random convolutional networks can work just as well as trained networks .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experimented using the Boston housing dataset which consists of N = 455 training data points in d = 13 dimensions.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 48,
+                            "text": "Boston housing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Classes are hashtags on baseball teams for the NPB and MLB data sets, and hashtags on television networks for the TV data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 68,
+                            "text": "NPB and MLB data sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 125,
+                            "text": "TV data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is well known that the categories in the Reuters corpus are defined by a small set of well selected features, whereas 20 NG needs many more features  which explains why 20NG needs a greater feedback quota.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 58,
+                            "text": "Reuters corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 121,
+                            "end": 126,
+                            "text": "20 NG",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 172,
+                            "end": 176,
+                            "text": "20NG",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, we follow  and include the external unpaired text data (i.e., British National Corpus and Wikipedia) for training our LSTM-P.",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 95,
+                            "text": "British National Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We developed a phone recognizer for English units using the BUT architecture and automatically generated STT transcripts on the Switchboard 2 Cell corpora .",
+                    "annotation_spans": [
+                        {
+                            "start": 128,
+                            "end": 154,
+                            "text": "Switchboard 2 Cell corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It can mine its trees either from a candidate itemset collection, or directly from the data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "How well do existing test collections support research in ambiguous queries?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments show that the proposed approach outperforms the state of the art on the LINEMOD, Occlusion LINEMOD and YCB-Video datasets by a large margin, while being efficient for real-time pose estimation.",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 133,
+                            "text": "LINEMOD, Occlusion LINEMOD and YCB-Video datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "from a collection of probability distributions P = {P 1 , P 2 , . . . }, respectively, where distribution P i+1 may depend upon the results of experiments 1, 2 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Fingerhut's existing modeling approach, the large data set was sub-sampled, and preliminary feature selection and data transformations were performed to obtain a reduced data set for which the relevant statistical computations are tractable.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides, similar results can be seen on the Amazon dataset, which are shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 58,
+                            "text": "Amazon dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, although adversarial training leads to certified robustness on training set (due to the design of the objective function ), the performance usually drops significantly on the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the first data set, known as rectangles-image, each image contains an occluding rectangle, and the task is to determine whether the width of the rectangle exceeds its height; examples are shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In most cases, the result collection was a combination of informative resources and noisy results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "RankSVM is still a very competitive performer when considering average results across all collections in the LETOR 3.0 L2R benchmark 7 , besides being readily available for experimentation, unlike other methods, which are proprietary.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 134,
+                            "text": "LETOR 3.0 L2R benchmark 7",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We fine-tune either the whole network or only fc6-8 layers on trainval set and evaluate on test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to illustrate the efficiency of the proposed algorithm, we conducted run-time experiments on three benchmark data sets for sequential data: network connection payloads from the DARPA 1999 IDS evaluation , news articles from the Reuters-21578 data set  and DNA sequences from the human genome .",
+                    "annotation_spans": [
+                        {
+                            "start": 237,
+                            "end": 259,
+                            "text": "Reuters-21578 data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 186,
+                            "end": 200,
+                            "text": "DARPA 1999 IDS",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also comparing the densities of the datasets we can see that the Movie-Lens dataset have significantly higher density than other dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 83,
+                            "text": "Movie-Lens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Target entities were chosen such that they receive a moderate number of mentions in the stream corpus: between once per day and once per week.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 101,
+                            "text": "stream corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For ADE dataset, we use 20,197 images for training and 1,000 images for testing with 445 categories, following .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "ADE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These trees result from applying the linkage functions SING, COMP, and MED to the MNIST dataset (first 10000 samples).",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 95,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This model has been chosen for the Graph500 benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 53,
+                            "text": "Graph500 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments, for e ective training of the model parameters and following the practices in , we combined four TREC datasets and constructed a new dataset with 200 queries and in total about 45,000 labeled documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we show the comparisons on the EVALUA-TION set of the Middlebury benchmark , which are also available on the Middlebury website.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 79,
+                            "text": "Middlebury benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We postulate that our method will perform better in situations where the similarity measure is highly indefinite as in the USPS dataset, while measures that are almost positive semidefinite maybe be seen as having a small amount of noise.",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 135,
+                            "text": "USPS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "presents further results for all four considered data sets for varying choices of parameters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our methods on the TREC GOV2 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 51,
+                            "text": "TREC GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The NBA dataset contains videos of basketball players attempting 3-point shots in 16 basketball games.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "NBA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When too few validation points (< 10 positives or negatives) reach a node n, we revert to computing statistics over the full validation set D V rather than D V (n).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compared a number of different metrics for scoring correlation anomalies using a real-world automotive sensor data set, and showed that the proposed conditional KL divergence metric significantly improves the performance over existing metrics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The INEX dataset contains English Wikipedia articles which were flattened by removing all XML markups.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "INEX dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To address the lack of real-world data in 3D pose estimation, we apply our bundle adjustment framework to \"in the wild\" clips from the Kinetics dataset  comprised of YouTube videos, and show how we can leverage our predictions on real-world videos as a source of weak supervision to improve existing 3D pose estimation models.",
+                    "annotation_spans": [
+                        {
+                            "start": 135,
+                            "end": 151,
+                            "text": "Kinetics dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conducted our experiments using the MNIST dataset, which contains handwritten digits from the 10 digit classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 52,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In many cases such as demographic data sets, only partially aggregated data sets are available.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since some categories are more frequent than others on YA, it is also interesting to inspect the frequency of the category in the collection relative to its general YA frequency.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While RGB datasets are understandably popular for 'internet vision', it is worth stepping back and askingwhy must an embodied agent navigating in 3D environments be handicapped to perceive with a single RGB camera?",
+                    "annotation_spans": [
+                        {
+                            "start": 6,
+                            "end": 18,
+                            "text": "RGB datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "LFA uses skills from Learning Factors Analysis, a semi-automated technique that refines expert-provided skills ; LFA skills are available for only the Fractions and Geometry datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 151,
+                            "end": 182,
+                            "text": "Fractions and Geometry datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To assess the quality of our ESA index, we apply it to compute word relatedness on the widelyaccepted WS-353 benchmark dataset , which contains 353 word pairs, and our experiments show a Spearman's rank correlation of 0.735, which is consistent to the previously reported numbers .",
+                    "annotation_spans": [
+                        {
+                            "start": 102,
+                            "end": 126,
+                            "text": "WS-353 benchmark dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Many tag recommendation strategies, and in particular the ones proposed here, exploit co-occurrence patterns by mining relations among tags assigned to the same object in an object collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset contains 20 3D CT scans with a resolution varied from 0.25 mm to 0.35 mm (12 for training and 8 for testing).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We generate missing values in the dataset according to the definition in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In summary, the proposed TMSA combines the topic modeling and word embedding components via a mutual learning mechanism and achieves the best performance on both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Blogs08 corpus and the news headline corpus from the New York Times (NYT)  were used for experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "Blogs08 corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 27,
+                            "end": 47,
+                            "text": "news headline corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For training data generation, we use the ShapeNet model database , and train on a subset of 8 classes (see Sec. 6).",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 64,
+                            "text": "ShapeNet model database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "where the parameters 01 and  according to the validation set perfonnance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This can potentially explain the poor performance of this approach on the MSD dataset where challenge organizers found that it was unable to outperform the simple songs by the same artist baseline .",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 85,
+                            "text": "MSD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A large surface form dictionary is obtained from the FACC collection, which contains Freebase annotations for the ClueWeb09 and ClueWeb12 datasets .",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 68,
+                            "text": "FACC collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 146,
+                            "text": "ClueWeb09 and ClueWeb12 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our testing dataset includes data with incomplete teeth.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We focus on the restriction of the Adult dataset  to its eight categorical attributes, as done in , and evaluate our approximations using average error within a cuboid, also as in .",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 48,
+                            "text": "Adult dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply our approach on a web host graph to detect web spammers, and on the IMDB dataset to detect unusual actor groups.",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 89,
+                            "text": "IMDB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection and the ground truth are publicly available 2 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "query a large synthetic dataset of rendered hands interacting with objects to retrieve configurations that match the visual evidence.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the case of the car side data set, the ten-part model shows a significant improvement compared to the five parts model while for the car rear data set the performance improvement obtained by increasing the number of parts is not as significant.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 36,
+                            "text": "car side data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We next report the average computational time for a single fold with the optimal parameters on CAS, since it is the largest dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Topic variability: Most IR test collections have only one instantiation of a topic (an exception is the TREC Query track).",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 120,
+                            "text": "TREC Query track",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our proposed method has consistently excelled the state-of-the-art method by 0.091 mAP@all on the Sketchy dataset and 0.074 mAP@all on the TU-Berlin dataset, which shows the effectiveness of our proposed SEM-PCYC model which gets benefited from (1) cycle consistency between sketch, image and semantic space, (2) compact and selected side information.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 113,
+                            "text": "Sketchy dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 139,
+                            "end": 156,
+                            "text": "TU-Berlin dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The initial corpus of features is clustered into k top-level groups, where group membership is determined by the Voronoi partitioning of the feature corpus according to the k cluster centers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In a cross-modal retrieval dataset, this means that we believe there is no bias towards having observed certain positive matches over others; whether this is justified depends on the nature of the data collection process.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In other words, we can choose between spending more time on constructing our collections, while reducing training time, or take a naive collection construction approach that results in larger collections and thus longer training times.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We construct a new large-scale fisheye dataset to train networks and to evaluate the effectiveness and efficiency of fisheye image rectification methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We generate samples at novel combinations by sampling from the summation of energy functions (we first finetune the summation energy to generate both training datasets using a KL term defined in the appendix).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This means that not only we have to discriminate between false and true positives (local and global frequent patterns), but also between false and true negatives, i.e., globally closed itemsets that result not to be closed in some of the partitions of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The distribution of document frequencies of terms in the collection follows a power law distribution with slope 2.38 (upper curve in ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section we present COIN, a video-based dataset which covers an extensive range of everyday tasks with explicit steps.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, as a sanity check, we explored performance when features are missing at random, in a series of five standard UCI benchmarks, and also in a large digit recognition task using MNIST data.",
+                    "annotation_spans": [
+                        {
+                            "start": 181,
+                            "end": 191,
+                            "text": "MNIST data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The synthetic control data set from the UCI machine      learning repository  contains 600 examples of control charts synthetically generated by the process.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 76,
+                            "text": "UCI machine      learning repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The test topics associated to each collection are randomly split into ten equal subsets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In other words, we may need not only a large scale dataset, but also a dataset with rich saliency categories to ensure generalization.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recently, Vapnik  and others -in their work on SVMs -have rediscovered that it may be advantageous to learn in the dual representation (see [1]), i.e. expanding the weight vector in terms of the training data m m i=l",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, Automatic Frankensteining is able to demonstrate for a further data set that it delivers the better predictions than auto-sklearn.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Training, development, and test sets were created from a 80-10-10 split.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They use the same pipeline that first either uses offthe-shelf detectors  or detectors finetuned with relationship datasets  to detect entities, then predicts the predicate using proposed methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We looked at the Isolet dataset (26 classes, 5,200 data points) from the UCI repository and the MNIST datasets of handwritten digits (10 classes, 5,000 data points).",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 31,
+                            "text": "Isolet dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 96,
+                            "end": 110,
+                            "text": "MNIST datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 87,
+                            "text": "UCI repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Of all three methods, the Adaptive method has the best overall precision, especially so for the LGL and CLUST datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 118,
+                            "text": "LGL and CLUST datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, extensive experiments on standard TREC collections have been conducted to evaluate our proposed feedback model from different aspects.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 59,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we evaluate BubbleNets annotation frame selection on multiple VOS datasets and achieve as much as an 11% relative improvement in combined Jaccard measure and region contour accuracy (J +F) over the same segmentation method given first-frame annotations.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 83,
+                            "text": "VOS datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are five views (Camera 0, 1, \u00b7 \u00b7 \u00b7 , 4) of eleven actions in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let N S ( y i ) be the collection of topm nearest neighbors of an embedding vector",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, extensive experiments are conducted on three realworld datasets to demonstrate the e ectiveness of our proposed models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our approach on the Breakfast  and YouTube Instructions datasets , following the evaluation protocols used in .",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 76,
+                            "text": "Breakfast  and YouTube Instructions datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second is the Corel-B dataset, which is divided into 175 training images and 130 testing images randomly.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 33,
+                            "text": "Corel-B dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Features such as document frequency (DF), collection frequency (CF) and Google frequency (GF) receive, as expected, negative weights in most cases.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments, three CLIR test collections in NTCIR3, 4, and 5  were used to build datasets for MLIR merging.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then, we build another baseline model that we train the entity grounding module only through weakly supervised learning over a ResNet101 backbone, which is pretrained over ImageNet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 172,
+                            "end": 188,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Learning filtering models purely through supervised labels or rewards can be impractical, requiring massive collections of labeled data or significant efforts at reward shaping.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From  we can see that for optimal \u03b7 and \u03bb values and varying values of \u03ba, ORASD returns more wealth than all the other competing algorithms for the NYSE dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 148,
+                            "end": 160,
+                            "text": "NYSE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We describe a multi-level optimization approach for finding good partitionings that scales to very large data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The selected datasets include the PASCAL-Part Dataset , the CUB200-2011 dataset , and the ILSVR-C 2013 DET Animal-Part dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 53,
+                            "text": "PASCAL-Part Dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 60,
+                            "end": 79,
+                            "text": "CUB200-2011 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 90,
+                            "end": 126,
+                            "text": "ILSVR-C 2013 DET Animal-Part dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We considered the \"Bikes\" sub-class from the Caltech 256 dataset, which contains multiple images of common objects with varying backdrops, and chose to match images in the \"touring bike\" class.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 64,
+                            "text": "Caltech 256 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Enron dataset with 0.5M records, the method with pruning can reduce the time from 30 ms to 17 ms.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 21,
+                            "text": "Enron dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the Category B of the ClueWeb09 collection and CW09A is its Category A English part.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 42,
+                            "text": "ClueWeb09 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All COCO annotated objects are included in the batch, the ones which are specified by our dataset as being preferred for a task are considered as positive examples for that task and the others are considered as negative.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since 253 of the sample images from the original 3987 FLIC training set came from the same scene as a test set sample (and were therefore removed by the above procedure), we added these images back so that the FLIC-plus training set is a superset of the original FLIC training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 263,
+                            "end": 280,
+                            "text": "FLIC training set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 49,
+                            "end": 71,
+                            "text": "3987 FLIC training set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 210,
+                            "end": 228,
+                            "text": "FLIC-plus training",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition to the qualitative analysis, we evaluate our models quantitatively on the CONLL 2002 Spanish and Dutch datasets, and the CONLL 2003 English dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 123,
+                            "text": "CONLL 2002 Spanish and Dutch datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 133,
+                            "end": 159,
+                            "text": "CONLL 2003 English dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The classic study on the extent of ambiguity in test collections from Krovetz acknowledged the dictionary he used did not cover proper nouns.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and CIFAR100 are two typical datasets for knowledge distillation evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 12,
+                            "text": "CIFAR100",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(Extended YaleB  and AR ), one object categorization database (Caltech101) , and one action recognition database (UCF 50 action ).",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 73,
+                            "text": "Caltech101",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 127,
+                            "text": "UCF 50 action",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All the users in our dataset have all demographic attributes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "which organizes 320 GB of neural network descriptors extracted from a collection of 20 million images; given a query image, the memory part of the index is able to identify a relatively small candidate set of descriptors that is loaded from the disk and refined to obtain the final result of the similarity query.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, SUNCG  has been shown to improve single-view surface normal estimation on natural indoor images from the NYU Depth dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 120,
+                            "end": 137,
+                            "text": "NYU Depth dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "four other real world datasets from the UCI Machine Learning repository .",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 71,
+                            "text": "UCI Machine Learning repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact, within the queries that contain spelling errors in the MSN dataset, there are about 40.6% of them contain more than 1 character errors.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 75,
+                            "text": "MSN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Intelligent surfer exhibits quite poor performance on GOV dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 65,
+                            "text": "GOV dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The hyper-parameter \u03bb s is set to 10 \u22124 for networks (except for DenseNet-89 with 5 \u00d7 10 \u22125 ) on CIFAR-10/100 dataset and 10 \u22125",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 117,
+                            "text": "CIFAR-10/100 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Performance on the GLUE diagnostic entailment dataset, at 0.42 R 3 , falls far below the average human performance of 0.80 R 3 reported in the original GLUE publication, with models performing near, or even below, chance on some linguistic phenomena ( , Appendix B).",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 53,
+                            "text": "GLUE diagnostic entailment dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If the newly created data set has very similar data characteristics to the original data set, then the condensed data set is a good substitute for most data mining algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, considering mT9P scores, the difference in percentage between ssRB (for k = 1) and TSVM on the Ohsumed collection is 4% which represents 58.8% of the interval length between the worse and best mT9P performance [33. .",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 126,
+                            "text": "Ohsumed collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In previous literature, some representative largescale datasets were built upon existing structures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the ABC metric with a trained calibrated scaling model and also evaluate it on out-ofdistribution FashionMNIST and notMNIST datasets in .",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 143,
+                            "text": "FashionMNIST and notMNIST datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our second dataset comprises of peer grading activity on a subset of questions of the HCI course offered on Coursera.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can also conduct an in-depth study on the Cross Term's distribution in documents collections, and examine which of the kernel functions fits the best to the actual distribution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The validation set is helpful to approximate the real distribution of both old and new classes in the feature space, allowing us to estimate the bias in FC layer.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, the learned linear combination does not consistently outperform either the uniform combination of base kernels or simply the best single base kernel (see, for example, UCI dataset experiments in , see also NIPS 2008 workshop).",
+                    "annotation_spans": [
+                        {
+                            "start": 181,
+                            "end": 192,
+                            "text": "UCI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We will first discuss the simple case in which an entire data set is available for application of the privacy preserving approach.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this series of tests, each collection is used in turn as training data while the other is used for testing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the hard negative videos are generated from the validation set, this experiment is performed on THU-MOS'14 testing set.",
+                    "annotation_spans": [
+                        {
+                            "start": 102,
+                            "end": 124,
+                            "text": "THU-MOS'14 testing set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Further investigation has shown that 20 primitives learnt from 12 character types are sufficiently generalised to represent all remaining novel character types without further learning, by using a single E-step to fit the pre-learnt parameters to a novel dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We chose 80 different classification data sets from the UCI repository.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 70,
+                            "text": "UCI repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and  show the results on the Letor TD2003 and TD2004 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 61,
+                            "text": "Letor TD2003 and TD2004 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimental results on the SLAM benchmark dataset EuRoC demonstrated that the proposed method outperformed state-of-theart SLAM methods, such as DSO, ORB-SLAM, and LSD-SLAM, both in terms of accuracy and robustness in trajectory estimation.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 54,
+                            "text": "SLAM benchmark dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For datasets with a ground truth, we also report the clustering accuracy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hence, the optimum number of clusters in a dataset may be determined by minimizing Q(P) with respect to k.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As mentioned above, we need to annotate the BOO, DVD, and MIX datasets with respect to Subjectivity and POA with respect to Policy.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 70,
+                            "text": "BOO, DVD, and MIX datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our method on two datasets: the Lytro dataset of Kalantari et al. , and our own Spaces dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 57,
+                            "text": "Lytro dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 92,
+                            "end": 106,
+                            "text": "Spaces dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Privately Aggregating Teacher Ensembles: Several works have considered the following setting: An ensemble of teacher classifiers, each trained on private data sources, noisily predict labels on an unlabelled public dataset that is further used to train a student model ].",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, the top clusters found on the Movie dataset with \u03b3 =",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 56,
+                            "text": "Movie dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ReQ-ReC framework assumes neither ownership nor full access to the document collection, but instead relies on a standard search service to retrieve documents from the index.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "I is the inverse frequency of inflection points (approximately 5.7 in our dataset).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Single Image 2D Human Pose Estimation: Over the last few years, great progress has been made in detecting 2D human body keypoints from a single image  by leveraging large-scale manually annotated datasets  with deep Convolutional Neural Network (CNN) framework.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, identifying a social network data set with visual links and studying the feasibility of GHF-ART for effective fusion of visual links will be an interesting extension work.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We had one paid annotator judge 60 topics in the TDT3 corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 60,
+                            "text": "TDT3 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides the structured classification tasks, we also verify the efficacy of ASM on structured regression tasks, such as monocular depth estimation and surface normal prediction, with U-Net  on Stanford 2D-3D-S dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 193,
+                            "end": 217,
+                            "text": "Stanford 2D-3D-S dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The sarcos dataset involves predicting the joint position of a robot arm; following convention we report results on the 1st, 5th and 7th joint positions.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "sarcos dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We explored the problem of mining co-evolving spatial event sets from a spatio-temporal dataset and proposed a novel algorithm to discover co-evolving co-located event sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The comprehensive experiments verify that our methods (1) improve an accuracy of average NDCG200 by \u223c30% over SimRank on various real networks, and (2) are \u223c10x faster than the state-of-the-art competitors on large datasets with 65.8M links for 1000 queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on the challenging KITTI dataset show that our method outperforms the state-of-the-art stereo-based method by around 30% AP on both 3D detection and 3D localization tasks.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 44,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We empirically evaluate our method on more complex random graph models and on the Netflix Challenge dataset demonstrating the effectiveness of our method over those of ].",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 107,
+                            "text": "Netflix Challenge dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A set of experiments has been conducted with two traditional TREC datasets and a new dataset based on the ClueWeb.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 74,
+                            "text": "TREC datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 106,
+                            "end": 113,
+                            "text": "ClueWeb",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As mentioned in Section 5.1.3, the comparison between FM and TFMAP is conducted on the Food dataset, due to the applicability of FM.",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 99,
+                            "text": "Food dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results:  compares the performance of ESPNetv2 with state-of-the-methods on both the Cityscapes and the PASCAL VOC 2012 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 127,
+                            "text": "Cityscapes and the PASCAL VOC 2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is possible that higher performance from DA can be obtained by some dataset-specific tuning of hyper-parameters on a validation set of labeled target-domain data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This also holds for other datasets, given that the relative measures can only find partitions as good as the ones generated by the clustering algorithms, which explains the low ARI in some cases.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is easy to do when each document has a single timestamp or a fixed lifespan t, as in the case of the NYT collection where every article was assumed to be valid for 90 days.",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 120,
+                            "text": "NYT collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For CNNs based on the Pascal VOC Part dataset , the study of  merged tens of small parts into several major landmark parts for the six animal categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 45,
+                            "text": "Pascal VOC Part dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method achieves state-of-the-art classification results on the CIFAR-100 image data set and the MIR Flickr image-text data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 67,
+                            "end": 91,
+                            "text": "CIFAR-100 image data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 100,
+                            "end": 130,
+                            "text": "MIR Flickr image-text data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SMO- For each dataset and solver, we run several trials with different number of iterations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To that end, we run bHUF-2 for US dataset with different values of K 2 ranging from 10 to 200, and show the boxplots of P@10",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 41,
+                            "text": "US dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated EASal on two publicly available datasets rich in emotion-evoking objects.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the above, (a) is generated on datasets with independent predictors and (b) is generated on datasets with correlated predictors.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used CLEF-IP 2010 collection which consists of 2.6 million patent documents.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 31,
+                            "text": "CLEF-IP 2010 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "erefore, we have to use two di erent datasets: the rst for the user dynamic derivation and the second for the LtR analysis.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "another large-scale dataset called Crowd Instancelevel Human Parsing (CIHP) dataset, which has 38,280 diverse human images.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 83,
+                            "text": "Crowd Instancelevel Human Parsing (CIHP) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For reference, we also considered an idealized setting, applying GLASSO to a similar dataset without the confounding effects (Ideal GLASSO), obtained by setting X = 0",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the projected dataset obtained by merging all the projected datasets",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The UvT Expert collection used in the experiments in this paper fits the scenario outlined in Section 3.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 25,
+                            "text": "UvT Expert collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This condition is satisfied in subset selection scenarios, where the dataset is large, the number of selected samples is a lot less than the number of samples (K \u226a M ), and we have freedom over the dimension of the samples/features (N ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given an arbitrary input image (adversarial or clean), we can obtain its latent features from the DAE and find the k-nearest neighbors in the training image dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The overall performance for this dataset is lower than that of the Forum dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 67,
+                            "end": 80,
+                            "text": "Forum dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Words were disambiguated and the retrieval effectiveness of an IR system applied to those collections was compared to the effectiveness of the system searching on the collection without disambiguation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the same network trained with only GoPro dataset and that trained with GoPro and our dataset together.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 59,
+                            "text": "GoPro dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 82,
+                            "end": 103,
+                            "text": "GoPro and our dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also test on the challenging USPS digits database (  ) on this dataset with the same homogeneous, degree 3 polynomial kernel, and identical preprocessing (normalization and centering), allowing us to compare pure classification performace.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 52,
+                            "text": "USPS digits database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we examined the clustering results for the \"Animal\" dataset and the \"Enron\" dataset qualitatively.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 72,
+                            "text": "\"Animal\" dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 81,
+                            "end": 96,
+                            "text": "\"Enron\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unlike traditional learning to rank models that depend on handcrafted features, neural representation learning models learn higher level features for the ranking task by training on large datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, based on commonly used base networks, our proposed SymNets achieve the new state of the art on benchmark domain adaptation datasets of Office-31 , ImageCLEF-DA , and Office-Home .",
+                    "annotation_spans": [
+                        {
+                            "start": 150,
+                            "end": 159,
+                            "text": "Office-31",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 162,
+                            "end": 174,
+                            "text": "ImageCLEF-DA",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 181,
+                            "end": 192,
+                            "text": "Office-Home",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given the OE dataset, we train our model with all the data except for the ones collected from Internet dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 20,
+                            "text": "OE dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 94,
+                            "end": 110,
+                            "text": "Internet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that conventional CF benchmark datasets, e.g., Netflix dataset, are not enriched with contextual information.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 44,
+                            "text": "CF benchmark datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 52,
+                            "end": 67,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As in the Newsgroups data set, SED significantly outperforms Convex TED (the p-value of paired t-test is 2.26 \u00d7 10 \u22125 ), validating the effectiveness of label information.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 29,
+                            "text": "Newsgroups data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here we randomly selected a training set of 40 images, and a test set of 5 different images from the MIRFLICKER-1M dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 122,
+                            "text": "MIRFLICKER-1M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on two challenging benchmark datasets show that our proposed method outperforms the state-of-the-art methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Clearly, our multilevel scheme can effectively approximate the solution of the original problem with much shorter time as long as the size labeled data set is not too small.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Noun phrase chunks are extracted using a syntactic shallow parser CRFChunker 4 trained on WSJ corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 100,
+                            "text": "WSJ corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These datasets have greatly promoted the research of human part segmentation, and considerable progress has been made.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As Prior Networks parameterize the Dirichlet distribution, ideally we would like to have a dataset are the parameters of a target Dirichlet distribution p(\u21e1| ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the DBLP Computer Science Bibliography Database, which consists of 871,004 authors and 2,365,362 papers with time provided (from 1970 to 2009).",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 54,
+                            "text": "DBLP Computer Science Bibliography Database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Other symbols (e.g., JJ, RB, IN, CC, VP) are part-of-speech tags in Penn Treebank format.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consider a supervised learning problem on a dataset = {(x , y )} =1 of observation-label pairs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "in  in combination with their KNN-SVM method to give the best previously published results on the Caltech 101 image recognition benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 137,
+                            "text": "Caltech 101 image recognition benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An illustrative example of co-evolving spatial events (a) A spatio-temporal dataset (b) Spatial prevalence time sequences and a specified query sequence a novel algorithm to efficiently mine co-evolving spatial event sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To realize this null hypothesis, in the permuted dataset, opinion words for a particular feature are randomly swapped in each user's review over a subset of words that have been used to describe this feature by all users.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform intensive study of the noise statistics of the FMD dataset and show that the noise is indeed Poissondominated for two-photon and confocal microscopy, and has larger Gaussian component for wide-field microscopy.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 69,
+                            "text": "FMD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, using d = 10 in the AT&T dataset, kNN gives a 10.9% testing error rate when used on the PCA features, and a 9.7% testing error rate when applied to the nonlinear features computed by KPCA.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 45,
+                            "text": "AT&T dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One interesting observation is that the efficiency of the technique was better for the higher dimensional data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The statistics of these collections are reported in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, though there are more samples in the OU-LP-Bag \u03b2 dataset, the final results regarding the rank-1 accuracy on both datasets are at the same level.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 75,
+                            "text": "OU-LP-Bag \u03b2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the Peterson-Barney vowel dataset we show that the algorithm performs well in finding appropriate placement for the codebook vectors particularly when the confuseable classes are different for the two modalities.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 39,
+                            "text": "Peterson-Barney vowel dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It also displays the best correlation with ARI for all datasets (along with CDbw in dataset 4).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One is the TREC GOV collection, which is a 1.25 million page crawl of the .gov domain in the year 2002.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 30,
+                            "text": "TREC GOV collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "summarizes the comparison among some publicly relevant instructional datasets and our proposed COIN.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the CASIA NIR-VIS 2.0 database, we take Fr\u00e9chet Inception Distance (FID)  to measure the Fr\u00e9chet distance of two distributions in the feature space, reflecting the distribution consistency.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 33,
+                            "text": "CASIA NIR-VIS 2.0 database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Sometimes, there are no outliers in RAND training sets and these training sets are invalid for training.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, in the Appazaar dataset we have over 7000 items, which means that a sampling size of 200 could save over 50% of the buffer construction time, as illustrated in .",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 36,
+                            "text": "Appazaar dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The UT Multiview dataset contains 160 gaze samples of 50 subjects recorded under controlled laboratory settings and 3D reconstructions of eye regions are used to generate synthetic images for arbitrary head poses.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 24,
+                            "text": "UT Multiview dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our zero-shot method takes as input the K attribute signatures and a dataset of images labeled with attributes, and produces a classifier for each unseen class as output.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A concept hierarchy created from a document collection can be used for query recommendation on Intranets by ranking terms according to the strength of their links to the query within the hierarchy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They show that infAP, statAP and depth-k pooling are better than hedge and the LETOR method (depth-k pooling using BM25) for building efficient and effective LtR collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 158,
+                            "end": 173,
+                            "text": "LtR collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this work, we evaluate our proposed network ternarization method for object classification task with CIFAR-10 and ImageNet datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 134,
+                            "text": "CIFAR-10 and ImageNet datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test our algorithm on the task of smile detection using a subset of 1, 000 images from the GENKI dataset (which is a collection of 60, 000 faces from the web).",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 107,
+                            "text": "GENKI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An optimization problem is devised to maximize the likelihood that the selected subgraphs are significant in the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This essentially induces a clustering effect in the document collection, allowing for better compression of docIDs and frequencies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From top to bottom: dance-twirl from the DAVIS16 dataset , horses05 from the FBMS dataset , and bird0014 from the Youtube-Objects dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 56,
+                            "text": "DAVIS16 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 77,
+                            "end": 89,
+                            "text": "FBMS dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 137,
+                            "text": "Youtube-Objects dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the multicluster dataset of (c), dip-means successfully discovers all clusters, in contrast to the other methods that significantly overestimate.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact, Silhouette even provides the best results for one dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The expert finding qrels for the two years differ: in 2005, 50 working group titles were the test topics for the expert finding task, resulting in 1509 expert-group pairs, with 2 to 391 experts in the same group and approximately 30 experts per group on average; names and e-mail addresses of 1092 expert candidates (W3C members) are given as part of the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using our proposed autoregressive framework leads to new state-of-theart performance on the reasonable and occlusion settings of the Caltech pedestrian dataset, and achieves competitive state-of-the-art performance on the KITTI dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 133,
+                            "end": 159,
+                            "text": "Caltech pedestrian dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 222,
+                            "end": 235,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train the model using all videos from the training set, and test it on the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on three data sets show that our method provides substantially better performance than the baseline methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "thus the document collection can be separated into k parts: {D 1 , . .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Bearing these results in mind, we argue that learning-based methods and statistical methods should be compared by considering their corresponding advantages and limitations in both single-dataset and cross-dataset scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is a task based on real-valued sensor signals, with the data extracted from the Bao04 activity recognition dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 119,
+                            "text": "Bao04 activity recognition dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our study shows that, for the MovieLens dataset with 1 million ratings, the recommendation RMSE (root mean square error) increases from 0.8645 to 0.9 when the co-clustering setting varies from 1 \u00d7 1 to 5 \u00d7 5.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 47,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Building on earlier work in [32], we propose a framework for indexing and querying in versioned document collections that integrates non-positional and positional indexes to enable fast top-k query processing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct self-comparisons on STB dataset  by fine-tuning the networks pretrained on our synthetic dataset in a weakly-supervised manner, as described in Section 4.4.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 42,
+                            "text": "STB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "successfully extends it toward unsupervised image translation  by proposing the cycle consistency loss, which allows the model to learn the distinctive semantic difference between the collections of two image domains and translate the corresponding style without a direct pair-wise supervision.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Road Network and Probe-Car Datasets..",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 39,
+                            "text": "Road Network and Probe-Car Datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Vine dataset is used in .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "Vine dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The training set consists of 60,159 different outfits from the collections 2015 \u2212 2017, and the validation and test sets have 2,683 and 3,104 outfits respectively, from the 2014 collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The test set contained two novel views of the eight constellations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Length normalization (Norm) is effective and improves the accuracy of tweet segmentation in the two collections for SCP-based stickiness.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One DeCAF7 feature is a 4096-dimensional vector taking 16 KB on the disk; the whole 20M dataset has thus some 320 GB of uncompressed data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We pick 10 subjects from MPIIGaze dataset and the same number of eye images were generated from GAN  for training a ResNet model.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 41,
+                            "text": "MPIIGaze dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated BoW-M and BoW-S on NLP tasks experimenting with all the 12 datasets used by Dredze et al",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Pairwise comparison labels are more informative and less variable than class labels, but generating them poses a challenge: their number grows quadratically in the dataset size.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The YouTube dataset is crawled on 9th August 2012, during the London Olympic Games.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 19,
+                            "text": "YouTube dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For CIFAR10 and SVHN, we resize the 32 \u00d7 32 \u00d7 3 images to 64 \u00d7 64 \u00d7 3 and for other datasets, original image sizes are used throughout our experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 11,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 16,
+                            "end": 20,
+                            "text": "SVHN",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Two Synthetic Datasets: The proposed MPGP is firstly evaluated on two simulated onedimensional datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For a larger in-memory collection, we used the TREC WT10G collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 68,
+                            "text": "TREC WT10G collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "en, select the pooling strategy used to build the test collection, select the appropriate parameters, and execute the pooling strategy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, Coco-Stuff dataset  contains 10,000 complex images from COCO with dense annotations of 91 thing (e.g. book, clock, vase) and 91 stuff classes (e.g. flower, wood, clouds), where 9,000 images are for training and 1,000 for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 32,
+                            "text": "Coco-Stuff dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the results on the dataset with selection bias and the reference one without selection bias.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides the real datasets, which are multidimensional, we also employ four 2D synthetic datasets, with different numbers of objects, clusters and noise, as depicted in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While some of the above baselines (SpOpt-\u2206, DocRebuild and CTSUM) are presumed to be unsupervised, they still tuned few parameters using the DUC 2005 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 141,
+                            "end": 157,
+                            "text": "DUC 2005 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct our experiments on CIFAR-10 dataset with three classic deep networks: VGG, DenseNet and ResNets.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 46,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We studied 30 patients with probable AD (age\u00b1 standard-deviation (SD) = 74\u00b14, range = 60-80 years, mini-mental score (MMS) = 23\u00b12) and 30 elderly controls (age\u00b1 SD = 73\u00b14, range = 60-80, MMS = 29\u00b11) which were selected from the ADNI database according to the following criteria.",
+                    "annotation_spans": [
+                        {
+                            "start": 228,
+                            "end": 241,
+                            "text": "ADNI database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The only exception is the TD2004 dataset for clicks generated using UBM.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 40,
+                            "text": "TD2004 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then observe that the weight \u03c9(x, y) in the graph corresponds to the overall strength of co-occurrences between x and y in the document collection, meaning that it represents a kind of term frequency (tf ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We split the sequences into 70% train, 10% validation, 20% test similar to our split of the original KITTI dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 114,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Do they coincide with the factors controlled by the dataset collectors?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This relevance score is used to rank the documents in the retrieval corpus.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments conducted on three large-scale datasets show that our approach not only reduces the training time, but also leads to significant performance gains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Based on extensive experiments, we demonstrate that the proposed method is effective to improve the accuracy of WSOL, achieving a new state-of-the-art localization accuracy in CUB-200-2011 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 176,
+                            "end": 196,
+                            "text": "CUB-200-2011 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared with KCCA, which is the second best method, the    as a bag-of-words as with the experiments using the Wikipedia dataset, and the tag list is represented as a set of tags.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 129,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is important to emphasize that to increase the applicability of the proposed model, we do not require that the user needs to know a priori whether the source datasets are related to the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "but the distance from the boundary provides a monotonic ranking of the entire test set which is suitable for this semantic retrieval task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a face dataset, we extract the feature for each face image with a trained CNN, forming a set of features To construct the affinity graph, we regard each sample as a vertex and use cosine similarity to find K nearest neighbors for each sample.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform experiments on varied MLN structures (Alchemy benchmarks ) with arbitrary evidence to illustrate the generality of our approach.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 67,
+                            "text": "Alchemy benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The corresponding data sets are denoted by Syn(dim).D500K.P(3) and Syn(dim).D500K.P(9) respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second corpus is the 20-Newsgroups dataset collected by Lang  with about 20,000 documents which are postings on 20 Usenet newsgroups.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Chen et al. , inspired by , introduce the Depth in the Wild dataset and train a CNN using ordinal relations between point pairs.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 67,
+                            "text": "Depth in the Wild dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The non-parametric DL models achieve comparable : Illustration of RDMM registration results on the OAI dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 110,
+                            "text": "OAI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We divided each data set into 1500 blocks of 10 symbols each and called a block session (same as a sequence).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the strength of the correlation differs over data sets, and the amount of training data is not the only characteristics that will influence the final performance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The model takes as input a video, the audio track of the video, a dialog history (which comprises the groundtruth script from the Charades dataset and the first t\u22121 QA pairs of the dialog), and a follow-up question (the tth question in the dialog).",
+                    "annotation_spans": [
+                        {
+                            "start": 130,
+                            "end": 146,
+                            "text": "Charades dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, LinUCB-SIN is a competitive baseline when, as in the Last.fm dataset, there are few very popular items.",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 87,
+                            "text": "Last.fm dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, in Epinions datasets, L1-MAX and L2-MAX perform better than the other algorithms.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 29,
+                            "text": "Epinions datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The document collection is the W3C corpus (appr.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 41,
+                            "text": "W3C corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To show the generalization capabilities of VOCA, we select, align and pose-normalize multiple neutral scans from the BU-3DFE database , with large shape variations.",
+                    "annotation_spans": [
+                        {
+                            "start": 117,
+                            "end": 133,
+                            "text": "BU-3DFE database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Training and recognition experiments were conducted using the speaker-independent, continuous-speech, DARPA Resource Management database.",
+                    "annotation_spans": [
+                        {
+                            "start": 102,
+                            "end": 136,
+                            "text": "DARPA Resource Management database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At this point, we would like to point out the storage requirement of the UniGrid structure with respect to the original data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Because email is a temporally ordered collection, we used a split by time (rather than randomly) to ensure that we do not use future information to predict past reply behavior.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In  we report for the MAX-CUT instances the average graph size reduction on each dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Real life problems in face recognition also show minor variations in pose, which are addressed by testing the kernels on images in the ORL dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 135,
+                            "end": 146,
+                            "text": "ORL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, this data set had a high level of correlation between locality and classification accuracy, since the labels were defined by cluster behavior.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this study, we present a Chinese Web collection, SogouT-16, which is the largest free-of-charge public Chinese Web collection so far.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 61,
+                            "text": "SogouT-16",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to existing models that are trained on FlyingThings3D  or Cityscapes , the models pretrained on our DrivingStereo outperform the others on our test set and KITTI Stereo dataset , which demonstrates the capacity of our dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 62,
+                            "text": "FlyingThings3D",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 67,
+                            "end": 77,
+                            "text": "Cityscapes",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 165,
+                            "end": 185,
+                            "text": "KITTI Stereo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the left side, we report detection accuracy at low FPPI rate for the ETH dataset, averaged over 5 random splits of training/test images as in .",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 83,
+                            "text": "ETH dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the genre labels for the Million Song Dataset are provided.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 54,
+                            "text": "Million Song Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also note that, except on the YahooVideo dataset, RankSVM's training time exhibits a much higher variability across different folds than GP.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 51,
+                            "text": "YahooVideo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We validate AS-GCN in action recognition using two skeleton data sets, NTU-RGB+D and Kinetics.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 80,
+                            "text": "NTU-RGB+D",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 85,
+                            "end": 93,
+                            "text": "Kinetics",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We provided participants with five datasets from different application domains and called for classification results using a minimal number of features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each data set, we first randomly choose one class, and regard it as target class and treat the other categories as the non-target class.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(The density for AP and WSJ collections is not shown but has the same pattern as the ROBUST collection.)",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 39,
+                            "text": "AP and WSJ collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 85,
+                            "end": 102,
+                            "text": "ROBUST collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We find that equal weighting of both terms (L R = 0.5) improves both physical measures without negatively affecting the reconstruction metrics on both the synthetic and the real datasets, as is shown in  (last row).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our network produces convincing results on the FLIC dataset (with low joint position error), however, because our simple Spatial-Model is less effective for a number of the highly articulated poses in the LSP dataset, our detector results in incorrect joint predictions for some images.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 59,
+                            "text": "FLIC dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 205,
+                            "end": 216,
+                            "text": "LSP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the dataset owned by each node becomes main memory resident in going from 2 to 4 nodes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All experimental con gurations appear to bene t from the inclusion of the 21 extra features, with two exceptions: runs using GoogleNews embeddings on the TREC QA benchmark, and one of the dropout runs on the WikiQA data.",
+                    "annotation_spans": [
+                        {
+                            "start": 154,
+                            "end": 171,
+                            "text": "TREC QA benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method is evaluated on DAVIS and YouTube-VOS datasets qualitatively and quantitatively, achieving the state-of-the-art performance in terms of inpainting quality and speed.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 57,
+                            "text": "DAVIS and YouTube-VOS datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, one may have a collection of images taken on a holiday trip, and want to summarize and arrange this collection to send to a friend or family member or to post on Facebook.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ten datasets were chosen to avoid any biased results and to represent the extremes along the following dimensions, stationary/nonstationary, noisy/smooth, cyclical/non-cyclical, symmetric/asymmetric, etc.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "erefore, as in , we consider the following types of datasets: (1) Source collection (O), (2) Rn-collection (Rn), (3) R-collection (R).",
+                    "annotation_spans": [
+                        {
+                            "start": 117,
+                            "end": 129,
+                            "text": "R-collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 66,
+                            "end": 83,
+                            "text": "Source collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 93,
+                            "end": 106,
+                            "text": "Rn-collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "State-of-the-art performance on standard UDA benchmarks, such as office-31 and digits, traffic signs adaptation tasks, with our feature-level DA method.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 55,
+                            "text": "UDA benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Incorporating more actions might improve the coverage, however, the aim of this paper is not to explore dataset-specific grammar, but to show that a flexible grammar works well and dialog memory helps.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a result, the trained model though having size 100 times more than the training file, fails to detect them in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The last columns in  report point and 95% interval estimates of the stability of the 43 TREC collections we considered.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This results in a total of 2772 meshes which are split among the training, validation and test sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MCBoost Input: A data set (x i , y i ) and a set of pre-defined weak-learners Output: Multiple boosting classifiers H k (x) =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Large scale simulations and experiments on UCI datasets and high-dimensional gene expression datasets showed that label propagation based on global linear neighborhoods captures the global cluster structures better and achieved more accurate classification results.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 55,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These preliminary findings motivated the present study, which greatly extends our prior work by proposing new metrics of relevance and several new recommendation strategies (including L2R based strategies), evaluating them on more recently collected and larger datasets, comparing them against more baselines, and reaching significantly better results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "where \u03bb p = 8 in our experiment, and the perceptual loss is based on VGG16 pretrained on the ImageNet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 109,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We illustrate this generalization called \"Kronecker GLASSO\" on synthetic datasets and heterogeneous protein signaling and gene expression data, where the aim is to recover the hidden network structures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The parameters of our model are tuned on DUC 2005 dataset and set as follows: \u00b5 = 0.1, \u03bb = 0.09 and 6 iterations.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 57,
+                            "text": "DUC 2005 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the ClueWeb Web collection both the English part of Category A (ClueA) and the Category B subset (ClueB) were used.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 30,
+                            "text": "ClueWeb Web collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We follow  to train and evaluate puzzle recognition on the ILSVRC2012 dataset , a subset of the ImageNet database .",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 77,
+                            "text": "ILSVRC2012 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 96,
+                            "end": 113,
+                            "text": "ImageNet database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results presented in  show that on both the CIFAR-10 and CIFAR-100 datasets whitebox attacks successfully change the prediction of DNN and DNN-ADV models to the second most likely class and evade detection",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 79,
+                            "text": "CIFAR-10 and CIFAR-100 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is modeled by first assigning different weights to the source datasets according to their relatedness.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Such approaches were not evaluated under the CQA vertical search setting yet, whose query length distribution and query attributes is quite distinguishable from general Web search and from question/answer datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides being more efficient than generating the full Hessian matrix (which would be prohibitive for high-dimensional datasets), the finite difference approach has the benefit of measuring larger-scale variations of the gradient (where the scale is set using the parameter h) in the neighborhood of the datapoint, rather than an infinitesimal point-wise curvature.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset was provided for KDD Cup 2012 track 2[1] and involves session logs of soso.com, a search engine owned by Tencent.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation on the Extended Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 34,
+                            "text": "Extended Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The UvT Expert collection was extracted from a different organizational setting than the W3C collection and differs from it in a number of ways.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 25,
+                            "text": "UvT Expert collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 103,
+                            "text": "W3C collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Even though fine-tuning is not performed on the TD500 training set, CRAFT outperforms all other methods as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 66,
+                            "text": "TD500 training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation datasets Two standard datasets for text categorization are used as the evaluation test bed: the Reuters-21578 dataset and the WebKB dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 128,
+                            "text": "Reuters-21578 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 137,
+                            "end": 150,
+                            "text": "WebKB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The index is created and updated in real time so that any change to the document collection is reflected into the index in an online fashion.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the Yelp Challenge dataset downloaded on October 2015 that contained 1.6 million reviews: http://www.yelp. com/dataset challenge 2 http://snap.stanford.edu/data/web-Amazon-links.html users and items with less than 10 interactions.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 34,
+                            "text": "Yelp Challenge dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The resampled dataset proposed by Vasileva et al.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate this property on a real-data tag prediction problem, using the Yahoo Flickr Creative Commons dataset, outperforming a baseline that doesn't use the metric.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 115,
+                            "text": "Yahoo Flickr Creative Commons dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset, the validation data is used to select the appropriate distribution and to decide whether the dependent variable y should be the observed time T or the logarithm of the observed time log(T ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on standard TREC collections and queries are shown in , where (I) refers to Indri and (A) refers to Anserini.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 36,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As current vision pipelines exploit large datasets consisting of natural images, learned models are largely biased towards them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show improved performance over baseline methods on several real world data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset collected most products and reviews released on Amazon.com in 2006.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection has already been used in our research work for detecting sentiment spikes and for detecting and ranking the likely triggers of the identified spikes .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To train and evaluate the proposed model, we also create a new largescale dataset labeled with corresponding distortion parameters and well-annotated distorted lines.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then used the larger MovieLens 10M dataset (0-5 rating scale, 69878 users, 10677 movies) to compare the proposed approach to the existing IFCF methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 45,
+                            "text": "MovieLens 10M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the Tags data is gathered from the LabelMe 8  and Flickr 9 databases.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 77,
+                            "text": "LabelMe 8  and Flickr 9 databases",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we study the problem of organizing and compressing positional index structures for versioned collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(2) Composition-1k testing dataset in , which is to evaluate how our network performs on natural images.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 34,
+                            "text": "Composition-1k testing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate our full object detection pipeline, we report results on the test set of the KITTI benchmark.",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 104,
+                            "text": "KITTI benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments show that we can select comparisons from a dataset involving more than 10 8 comparison pairs,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "#(t, e)/#(e), where #(t, e) is the number of tables in the table corpus containing entity e in the core column and term t in the table caption, and #(e) is the total number of tables contaning e.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 71,
+                            "text": "table corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As it can be observed for both, our method and , the accuracy reported between 5 and 10-tasks of the MNIST benchmark has changed a little, suggesting that for this dataset and evaluation methodology both approaches have largely curbed the effects of catastrophic forgetting.",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 116,
+                            "text": "MNIST benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By the self-consistency of RTPs, this has the effect of marginalizing out MJP events involving cuts that do not further separate the predictors in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on Time Series Data Set.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 31,
+                            "text": "Time Series Data Set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We ran our experiments using the wt10g TREC web collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 58,
+                            "text": "wt10g TREC web collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The miniImageNet dataset  is a subset of 100 classes selected from the ILSVRC-12 dataset  with 600 images in each class.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 24,
+                            "text": "miniImageNet dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 71,
+                            "end": 88,
+                            "text": "ILSVRC-12 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We constructed symmetrized K-NN graphs from the multivariate data, where K = 5 for the 30 smallest datasets, text datasets, PROTEIN and SEISMIC datasets, while K = 10 for the remaining datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 152,
+                            "text": "PROTEIN and SEISMIC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "F mod&0.9 b 3 and F 0.9 b 3 average values of the two random algorithms are presented in  for different k values (from 2 to 20) and using the three SRC datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 148,
+                            "end": 160,
+                            "text": "SRC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "His current research interests include building test collections for social media environments and nontraditional retrieval tasks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(1) Human image matting testing dataset, which is to measure the performance of our method on a specific task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform experimental evaluation of our methods on 4 datasets: one dataset, STL-10, for object recognition to benchmark our hierarchical model and three datasets for annotation: Natural Scenes, IAPRTC-12 and ESP-Game 2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 84,
+                            "text": "STL-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 180,
+                            "end": 194,
+                            "text": "Natural Scenes",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 196,
+                            "end": 205,
+                            "text": "IAPRTC-12",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 210,
+                            "end": 220,
+                            "text": "ESP-Game 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to test our framework on search diversity, we use the data from the TREC diversity task, namely, the ClueWeb collection category B, the topic data, and the relevance assessments from the TREC 2009 and 2010 editions .",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 139,
+                            "text": "ClueWeb collection category B",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 196,
+                            "end": 214,
+                            "text": "TREC 2009 and 2010",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, some further improvements over our heuristics (up to 12% in precision) can also be achieved with our L2R based strategies, although the best of the two techniques depends on the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct ablation study on VehicleID dataset to investigate the effeteness of each part branch in our model.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 46,
+                            "text": "VehicleID dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We call the collection F = {K tn } of all base kernels the semantic kernel forest.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Reuters Corpus RCV1-V2 each document is labeled as belonging to one or more categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 29,
+                            "text": "Reuters Corpus RCV1-V2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on the TREC-5/6 English-Chinese test collection show this approach to be promising.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 59,
+                            "text": "TREC-5/6 English-Chinese test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As training CNNs on videos is computationally expensive, we used a subset of the Kinetics dataset  with 100k videos from 150 classes: Tiny-Kinetics.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 97,
+                            "text": "Kinetics dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 134,
+                            "end": 147,
+                            "text": "Tiny-Kinetics",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Words were taken from the TI-46 Word database, which contains 10 training and 16 testing tokens per word per talker and 16 talkers.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 45,
+                            "text": "TI-46 Word database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This data set is an example of the motivating application from Fingerhut in Section .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A natural question, therefore, is how this dataset compares to the o cial Tweets2013 collection",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 95,
+                            "text": "Tweets2013 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate KTN on multiple datasets and multiple source models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the data sets on relaxed dimensions, the MS-distance filtered off over 95% of data without lose of accuracy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The KBA corpus is essentially a time ordered document stream spanning from October 2011 to January 2013, containing over 1 billion documents crawled from the web.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 14,
+                            "text": "KBA corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We share our WebCrowd25k dataset 1 , including: (1) crowd judgments with rationales, and (2) taxonomy category labels for each judging disagreement analyzed.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 32,
+                            "text": "WebCrowd25k dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the proposed method on the challenging PASCAL VOC2012 dataset  and compare with previous methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 73,
+                            "text": "PASCAL VOC2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is perhaps the best performance by (trainable and hand-crafted) deep hierarchical models on the Caltech101 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 119,
+                            "text": "Caltech101 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the CPU time against the input dataset size .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When multiple test collections are to be compared, the user can repeat the process with a new instance of the application for each test collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our progressive networks are evaluated on three synthetic datasets, i.e., Rain100H , Rain100L  and Rain12 .",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 82,
+                            "text": "Rain100H",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 85,
+                            "end": 93,
+                            "text": "Rain100L",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 99,
+                            "end": 105,
+                            "text": "Rain12",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Establishing benchmark datasets for the evaluation of causal discovery algorithms will naturally accelerate the development of this research discipline and increase its real-world impact.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This can be categorized into (a) work on maintaining inverted indexes when faced with changes in the document collection and (b) approaches that make search aware of temporal information associated with documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This content, typically multimedia (e.g., images and videos), brings challenges to current multimedia Information Retrieval (IR) methods, not only due to the scale of object collections and upload rate, but also due to the (usually) poor quality of user-generated material .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The convolutional layers of our CNN subnet are pre-trained on the ImageNet 2012 dataset , while the other layers are trained from Comparative results for large-scale FSL on the ImNet dataset, which contains 1,000 source classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 87,
+                            "text": "ImageNet 2012 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 177,
+                            "end": 190,
+                            "text": "ImNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the ETH dataset alone, this threshold is lowered to 0.3 to enable a fair comparison with the related work .",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 18,
+                            "text": "ETH dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The tremendous difference in performance is mainly due to the difference between the nature of the two collections SGE_g and SIN_g.",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 120,
+                            "text": "SGE_g",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 125,
+                            "end": 130,
+                            "text": "SIN_g",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The full collection contains 938, 035 emails and 325, 506 a achments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recently, data-driven deep saliency models have achieved high performance and have outperformed classical saliency models, as demonstrated by results on datasets such as the MIT300 and SALICON.",
+                    "annotation_spans": [
+                        {
+                            "start": 174,
+                            "end": 180,
+                            "text": "MIT300",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 185,
+                            "end": 192,
+                            "text": "SALICON",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When pooling results across all numbers of topics \u226520, we find that S is significantly better than Base with p = 1.4 \u00d7 10 \u22124 and SW is better than W with p = 5 \u00d7 10 \u22125 on the ACL corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 175,
+                            "end": 185,
+                            "text": "ACL corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Without requiring a pre-mined candidate collection, Slim is parameter-free in both theory and practice.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, Authorship+Question Mark+5W1H Words+Length achieved similar or even better results than Sequential Pattern Mining on both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When we train the deep networks on ImageNet1K dataset, the initial learning rates are set to 0.1 and divided by 0.1 at epoch .",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 53,
+                            "text": "ImageNet1K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact adjacent region edges connect at the region boundary by continuity, however their angle might differ, this angle defines the curviness of the decision boundary, which is defined as the collection of all the edges introduces by the last layer.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The baseline substantially outperforms BARACO only for the perfect sDBN user model on the TD2003 and TD2004 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 116,
+                            "text": "TD2003 and TD2004 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the ten TREC test collections used in our experiment.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 35,
+                            "text": "TREC test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset Image Segmentation contains planar graphs that are constructed  For the MAX-CUT problem we use two different types of instances.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On benchmarks, RFNs are compared to other unsupervised methods like autoencoders, RBMs, factor analysis, ICA, and PCA.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We developed and release a new benchmark dataset for demographic prediction in retail business scenario which could be used for future research.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The query logs chosen to build these features were from previous to April 2012 to ensure a fair experimental setting with no overlap with the data collection period of the intrinsically diverse or regular sessions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A recurring challenge in most recent QA datasets has been that humanwritten answers contain unexpected but distinct biases that models can easily exploit.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 48,
+                            "text": "QA datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also point out that the performance on this dataset has almost saturated, because the percentage is already above 90% even at the lowest threshold.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results on the smaller 10K subset were made visible throughout the challenge (\"public leaderboard\"), while results on the 100K Note that the MSD dataset has more than 20 times more items than both MovieLens and Netflix, and is over 100 times more sparse.",
+                    "annotation_spans": [
+                        {
+                            "start": 145,
+                            "end": 156,
+                            "text": "MSD dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 215,
+                            "end": 222,
+                            "text": "Netflix",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 201,
+                            "end": 210,
+                            "text": "MovieLens",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An even more apparent pattern is observed on DBLP3 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 58,
+                            "text": "DBLP3 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "achieve an average recall of 92.8% and 94.7%, respectively, which is close to solving the 3DMatch data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 106,
+                            "text": "3DMatch data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the proposed approach with LSTM, the time-aware RNN model (TimeJoint)  and recurrent marked temporal point process model (RMTPP )  on the Stack Overflow dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 149,
+                            "end": 171,
+                            "text": "Stack Overflow dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Incorporating Temporal Effects: Social streams are temporally sensitive in nature.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the 3 implicit feedback datasets described in Section 5.1.2.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, accessories and clothes are in many cases possessed by people in the images of the OID dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 100,
+                            "text": "OID dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Nam and Han  propose a multi-domain deep classifier combined with the hard negative mining, bounding box regression, and online sample collection modules for visual tracking.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The task is to predict movie ratings in the Internet Movie Database (IMDB 1 ), which has been used in movie recommendation .",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 67,
+                            "text": "Internet Movie Database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The number of neurons in the fully-connected layer fc6 and fc7 are all 4096, and in fc8 it is equal to the number of categories in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comprehensive results on single-illumination and multi-illumination color constancy datasets show that GI outperforms the state-of-theart learning-free methods and achieves state-of-the-art in the cross-dataset setting.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first one is the pcmac data set, which is a subset of the 20-newsgroup data set from , while the second one is the RCV1 data set, which is a filtered collection of the Reuters RCV1 from .",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 83,
+                            "text": "20-newsgroup data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 132,
+                            "text": "RCV1 data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 21,
+                            "end": 35,
+                            "text": "pcmac data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 172,
+                            "end": 184,
+                            "text": "Reuters RCV1",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "achieves a 3.5-times speedup on the WikiVote dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 52,
+                            "text": "WikiVote dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is a unigram language model specified by the following formula with standard Dirichlet smoothing : where N d is the length of document d from the target collection DC, and \u00b5 is the parameter of Dirichlet smoothing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 3 is devoted to a detailed description of the RAVEN dataset generation process, with Section 4 benchmarking human performance and comparing RAVEN with a previous RPM dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 67,
+                            "text": "RAVEN dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 148,
+                            "end": 153,
+                            "text": "RAVEN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 170,
+                            "end": 181,
+                            "text": "RPM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "face images of four expression from the AR data set; Second row: the markers identified by tree structured group Lasso; Third row: the markers identified by Lasso.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 51,
+                            "text": "AR data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then applied our incremental imputative SVD to the raw training dataset and found a 5-dimensional subspace that had even better prediction accuracy of 0.7910 MAE, 1.0811 SD (see ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Corel image collection: 528 images distributed over 30 categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 22,
+                            "text": "Corel image collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The common object categories in natural scenes can vary from fruits to animals and the counting must be performed in both indoor and outdoor scenes (e.g. COCO or PASCAL VOC datasets).",
+                    "annotation_spans": [
+                        {
+                            "start": 154,
+                            "end": 181,
+                            "text": "COCO or PASCAL VOC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used speech from the TIMIT acoustic-phonetic continuous speech corpus , a dataset of utterances spoken by 630 native American English speakers.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 72,
+                            "text": "TIMIT acoustic-phonetic continuous speech corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "dataset is much smaller and more balanced as compared all other datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the Wikipedia collection contains structured text, we can make use of the given paragraph segmentation and retrieve and score XML <P> elements as well.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 30,
+                            "text": "Wikipedia collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For PSP-Net, we use 'poly' learning rate schedule as in  and train 220K, 18K and 76K on COCO, Cityscapes and our dataset with minibatch size 16.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 92,
+                            "text": "COCO",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 94,
+                            "end": 104,
+                            "text": "Cityscapes",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "dataset contains 60 challenging sequences (replacing some simple sequences with more difficult ones in VOT2016) and has more accurate groundtruth.",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 110,
+                            "text": "VOT2016",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By considering a much larger search space, they improve over Krimp at expense of efficiency-for beam-width 1 it coincides with Kramp-and hence only small datasets are considered.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Enron collection was our attempt to provide a representative collection, as it had previously been used in the TREC 2009 Legal track , that would more accurately indicate the effectiveness of participant systems.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "Enron collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 115,
+                            "end": 136,
+                            "text": "TREC 2009 Legal track",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to reach a holistic measure of quality, especially when evaluating the quality of reconstruction of real datasets without ground truth, the Blind/Referenceless Image Spatial Quality Evaluator (BRISQUE) , which utilizes normalized luminance coefficients to quantify the naturalness in images, is applied.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we propose learning algorithms that are data-driven; specifically, we will rely on inter-event times between event labels in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Analysing these results in more detail, we see that, unsurprisingly, the largest improvements are made on the large and/or dense datasets, such as Accidents, Connect-4, and Ionosphere.",
+                    "annotation_spans": [
+                        {
+                            "start": 147,
+                            "end": 156,
+                            "text": "Accidents",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 158,
+                            "end": 167,
+                            "text": "Connect-4",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 173,
+                            "end": 183,
+                            "text": "Ionosphere",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the Wikipedia collection, for the three versions of the core compared (w = {1, 2, All}, No-EOL), we applied reranking to the top-100 hits, and then collected the top-10 hits returned by each query for rating.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 30,
+                            "text": "Wikipedia collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Other conclusions also emerged from the research: learning-based methods generally perform worse in the cross-dataset setting; When testing on an image with color checker masked by zeros, learning-based methods can still exploit the location of the color checker and overfit to scene and camera specific features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As for the labeled document classification data sets, they were extracted from sections of the Reuters RCV1/RCV2 corpora, again for the 3 pairs considered in our experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 120,
+                            "text": "Reuters RCV1/RCV2 corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The index proposed by Dunn  is based on a partitioning P of the dataset D into k clusters C i , i = 1, 2, . . . , k, a distance measure (intercluster distance)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We identified near duplicates in the AQUAINT collection with a variant of Broder's duplicate detection algorithm , and only considered documents with the highest possible similarity to be duplicates of each other, i.e. all shared shingles are identical.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 55,
+                            "text": "AQUAINT collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that Mnas-Net  was searched directly on ImageNet dataset and need to validate time latency during searching, which is a very resource-exhausted process due to the high training cost on such a large scale dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 61,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show improved performance in detecting the principal occluding and contact boundaries for the scene over previous methods on data gathered from the LabelMe database.",
+                    "annotation_spans": [
+                        {
+                            "start": 151,
+                            "end": 167,
+                            "text": "LabelMe database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recall that for the Davies-Bouldin index, small values are better than larger ones, so we are interested in those values of k for which the Davies-Bouldin index values computed from the original dataset lie below the smallest of the permutation values.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "DE involves expanding each document in the collection by adding terms from a set of topically related documents, either from the search collection or elsewhere.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, we train a language model on the sentence corpus using the adversarial text generation method , which generates a sentence conditioned on a given image feature.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the clustering quality is comparable across the datasets, ISM clearly produces the lowest cost with the fastest execution time.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, due to the space constraints, we focus our attention on the resulting parameterization for the Robust04 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 131,
+                            "text": "Robust04 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This suggests that for a larger collection like the WT10g, risk adjustment is even more favorable than for smaller collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then, we add the MPIIGaze dataset to the training data and apply leave-one-person-out gaze estimation for real-video dataset, it improves the gaze estimation accuracy by more than 1 \u2022 which partly supports the domain shift problem between UT Multiview and MPIIGaze.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 33,
+                            "text": "MPIIGaze dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A smaller topic Dirichlet smoothing parameter \u03b7 generally supports a larger number of topics, as shown in the left column of , and hence often leads to lower perplexities, as shown in the middle column of ; however, an \u03b7 that is as small as 0.001 (not commonly used in practice) may lead to more than a thousand topics and consequently overfit the corpus, which is particularly evident for the HDP-LDA on both the JACM and PsyReview corpora.",
+                    "annotation_spans": [
+                        {
+                            "start": 414,
+                            "end": 440,
+                            "text": "JACM and PsyReview corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed approach is evaluated on a synthetic dataset and 6 public benchmark datasets with comparison with representative baseline approaches.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The runs are based on 221 INEX Ad hoc topics of 2006 and 2007 and the Wikipedia collection  consisting of over 650,000 documents and more than 52 million XML elements.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 90,
+                            "text": "Wikipedia collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then, given a collection of Fermat pathlengths, our procedure will produce an oriented point cloud (locations and normals) for the NLOS surface X .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When fit to a collection of documents, the topic distributions often reflect the themes that permeate the document collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we are applying these results to biological datasets, starting with publically available datasets like those considered by Dudoit and Fridyland  and ultimately examining gene expression datasets that are currently being generated in our laboratory.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, we note that unlike the Brightkite and Foursquare checkin datasets, the Yelp dataset consists of only user-venue ratings, and hence the sequential properties of visits to venues cannot be observed.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 72,
+                            "text": "Brightkite and Foursquare checkin datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 78,
+                            "end": 90,
+                            "text": "Yelp dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar to the comparison on Market-1501 dataset, our pyramid model exceeds PCB+RPP 9.8% and 5.7% at metrics mAP and rank 1, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 48,
+                            "text": "Market-1501 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If the data is distributed by samples, each machine has a complete view of the problem (albeit a partial view of the dataset).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the MSLR dataset, we use insights regarding the maximum BM25 score from  and use the max of the BM25.whole.document feature to predict when the baseline (the ranking obtained from the same feature) will perform well.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 20,
+                            "text": "MSLR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When applied to the seed retrieval task, our model yields state-of-the-art results on the TREC 2007 Blog Distillation Task dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 130,
+                            "text": "TREC 2007 Blog Distillation Task dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, we train a model on the Chi- nese Signature Dataset and test the model on the BHSig-H Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 106,
+                            "text": "BHSig-H Dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 37,
+                            "end": 64,
+                            "text": "Chi- nese Signature Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The CHC dataset includes a vast number of microstructure images describing phase separation and was created by solving the following partial differential equation: \u2202u \u2202t =",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "CHC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the second part, we measured the strong scalability of our implementation on the SDSS dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 97,
+                            "text": "SDSS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We explore two aspects of user variability with regard to evaluating the relative performance of IR systems, assessing effectiveness in the context of a subset of topics from three TREC collections, with the embodied information needs categorized against three levels of increasing task complexity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This behavior is demonstrated on dataset 0095, which does contain a sufficiently large amount of lines in the building facades.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In terms of the compared methods, if the results of their methods on some datasets are not reported, we run the released  code with hyper-parameters mentioned in their papers, and the results are marked by (*) on top.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the two relative small data sets, i.e. the Tweet data and the Title data, the number of documents is only about twice of the number of distinct terms.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 56,
+                            "text": "Tweet data",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 65,
+                            "end": 75,
+                            "text": "Title data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset includes 685,678 documents with 17,297,548 versions (\u00b5 = 25.23 and \u03c3 = 28.38).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our SAIL-VOS dataset differs in that the scenes are from both indoor and outdoor settings and consequently : The dataset collection pipeline.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "SAIL-VOS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results for the Cora dataset, which also includes attributes, clearly magnify the benefits of flexible posterior as SI-VGRNN improves the accuracy by 2% compared to VGRNN.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 32,
+                            "text": "Cora dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For real datasets including absolute labels (ROP, Sushi, Netflix, Camra), we also repeat experiments 150 times, each time with a different randomly selected set A.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and id f (t) are estimated on the target collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "YouTube-VOS benchmark  shows the results obtained on YouTube-VOS validation set for the zero-shot VOS problem.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 21,
+                            "text": "YouTube-VOS benchmark",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 53,
+                            "end": 79,
+                            "text": "YouTube-VOS validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(right) demonstrates the substantial benefits of our approach on the banana dataset  over competing methods such as boosting and decision trees, both of which evidently overtrain.",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 83,
+                            "text": "banana dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "LETOR challenge dataset that has 3 splits",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 23,
+                            "text": "LETOR challenge dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We briefly introduce some details of the two data sets below.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This accuracy is tested by a set of experiments we perform on three datasets:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows that Parallel-HDP converges at essentially the same rate as standard HDP on the KOS data set, even though topics are generated at a slower rate.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 98,
+                            "text": "KOS data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": It is an instance based evaluation measure and therefore weighs higher those categories which have higher fraction in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We split each dataset into 5 folds and repeat the experiments 5 times using one fold as the test set and the remaining 4 folds as the training set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To facilitate future research based on our ApolloCar3D dataset, we also develop two 3D car understanding algorithms, to be used as new baselines in order to benchmark future contributed algorithms.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 62,
+                            "text": "ApolloCar3D dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We present efficient heuristics for learning PGEMs from data, demonstrating their effectiveness on synthetic and real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Dependence on d.  shows performance over synthetic datasets as a function of dimension d.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A cleaner external corpus, such as the Wikipedia, has potentially less to contribute as a source of expansion terms for these query sets because the target corpus is already relatively clean.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 48,
+                            "text": "Wikipedia",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To compare our method with previous methods, we use the test video sequences from the above two datasets except for the first four frames and the last frame in each video, as  does not provide the results for them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In a real experiment, with a dataset of finite size, the optimal vector found by any of the R\u00e9nyi divergencesv will deviate from the true relevant dimension\u00ea 1 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\u2022 TNMF E substantially outperforms baseline methods on both data sets, especially in the Question data which has much more documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The two terms in the condition should appear at least certain time together in the collection (10 in our case) and they should be related.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "During training time, we excluded the ETH3D and Sceneflow datasets and finetuned with Middlebury-v3, KITTI-12 and KITTI-15.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 66,
+                            "text": "ETH3D and Sceneflow datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 99,
+                            "text": "Middlebury-v3",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 101,
+                            "end": 109,
+                            "text": "KITTI-12",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 122,
+                            "text": "KITTI-15",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ICA, applied to this data set in which about 50% of the trials were contaminated by blinks, successfully isolated blink artifacts to a single component ( ) whose contributions could be removed from the EEG records by subtracting out the component projection .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test both equations 9 and 12 to estimate the rank-relevance model ( | ), using relevance judgments from the TREC dataset and AOL click statistics, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 111,
+                            "end": 123,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The pooling method consists in building a test collection by using the results provided by a set of search engines.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Among all the compared methods, up to the date of submission of this paper, FFCC  achieves the best overall performance  with the both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The size of the dataset after preprocessing was 1813 people by 1532 movies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The fourth dataset is the Wine dataset, which contains 178 samples from three types of wines.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 38,
+                            "text": "Wine dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Authors rolling their own test collections have a high bar to overcome because reviewers prefer test collections that emerge from large community evaluations like TREC.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, multi-view representations convert 3D data to a collection of 2D images, thus making them compatible with existing methods for robustness analysis of image clas-sifiers (e.g. ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Word distance benchmark More surprising perhaps is the relatively strong performance of the word distance benchmark, particularly relative to the frame-semantic benchmark, which we had expected to perform better.",
+                    "annotation_spans": [
+                        {
+                            "start": 146,
+                            "end": 170,
+                            "text": "frame-semantic benchmark",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 23,
+                            "text": "Word distance benchmark",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 92,
+                            "end": 115,
+                            "text": "word distance benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In any case, preliminary evaluation of \u03b4 k or \u2206 k values provides a basis for excluding values of k for which significant evidence of cluster structure in the dataset appears unlikely.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Beyond the empirical validation of this heuristic, we also share our improved TrecQA dataset with the community to support further work in answer selection.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 92,
+                            "text": "TrecQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report the IoD averaged across all ground-truth intervals in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As an example, in  it was observed that Gaussian data augmentation and adversarial training improve robustness to noise and blurring corruptions on the CIFAR-10-C and ImageNet-C common corruption benchmarks , while significantly degrading performance on the fog and contrast corruptions.",
+                    "annotation_spans": [
+                        {
+                            "start": 152,
+                            "end": 206,
+                            "text": "CIFAR-10-C and ImageNet-C common corruption benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We randomly selected 1% samples as the training set for Iris flower dataset and Yale Face dataset and run random trials of classification multiple times to report the average accuracy for each method.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 75,
+                            "text": "Iris flower dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 80,
+                            "end": 97,
+                            "text": "Yale Face dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the spam filtering technique described by Cormack et al. , which assigns a \"spamminess\" percentile S(d) to each document d in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first report activity recognition accuracy on the FPHA-HO dataset and compare our results to the state-ofthe-art results of  in .",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 68,
+                            "text": "FPHA-HO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the larger Nature corpus, online LDA finds a solution as good as the batch algorithm's with much less computation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, for the Cn data set, which has a very low classification accuracy for the outlier class, using OF still achieves close to 50 % accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 32,
+                            "text": "Cn data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We hypothesize that this robustness is due to the short temporal span of the Tweets2011 corpus (two weeks).",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 94,
+                            "text": "Tweets2011 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "provides the comparative results obtained by our model using the four class hierarchies on the large-scale ImNet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 120,
+                            "text": "ImNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "use large-scale RGB datasets to pre-train a prior model and employ depth-induced features to enhance the network.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 28,
+                            "text": "RGB datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Nevertheless, soliciting comparison labels poses a significant challenge, as the number of potential comparisons is quadratic in the dataset size.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We split our query collection (see Sec. 3) into a 61,000 query training-set, a 14,000 query validation-set and a 61,000 query test-set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset instance, we define a pointer r to the visual region which the question or answer refer to, and measure the model's visual attention (probability) over that region.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the table, we can notice that our proposed method outperforms the competing methods on these benchmark datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The document collections were created by crawling the main websites of the institutions with Nutch 2 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the third use case is about building a test collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that in open set recognition scenario, there exist outliers that do not belong to the classes in the training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the results on three separate datasets: FACES 1 with 1720 images, 4 unique people and 100 training images in every fold, FACES 2 with 245 images, 9 unique people and 50 training images, and FACES 3 with 352 images, 24 unique people and 70 training images.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 53,
+                            "text": "FACES 1",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 127,
+                            "end": 134,
+                            "text": "FACES 2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 196,
+                            "end": 203,
+                            "text": "FACES 3",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We discover several interesting anomalies in the Facebook data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 66,
+                            "text": "Facebook data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "presents the results of the Content Extraction via Text Density with DensitySum (CETD-DS) method when given the task of extracting contents from the CleanEval, Big 5 and the Chaos data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 149,
+                            "end": 189,
+                            "text": "CleanEval, Big 5 and the Chaos data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the expectation, we use the empirical mean of the training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In supervised learning, both of these problems are believed to be due to a limited data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our final goal is thus to design an intelligent partitioning technique that allows to mine small subsets of the original datasets entirely in main memory, and a merging strategy able to derive the whole collection of closed itemsets from the local results obtained from each partition.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On average, our strategies converge at about 15 iterations on the LETOR datasets, and around 5 to 10 iterations on the multi-relevance judgment datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 80,
+                            "text": "LETOR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Tab. 1, we compare our method with several state-ofthe-art methods in the shape classification results on both ModelNet10 and ModelNet40 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 114,
+                            "end": 148,
+                            "text": "ModelNet10 and ModelNet40 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We outperform STL method and the baseline method for the computer dataset while perform better/equal on the school dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 73,
+                            "text": "computer dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 108,
+                            "end": 122,
+                            "text": "school dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On SMTP and Shuttle datasets, improvements are minor, since performance of unsupervised method is already very high.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 28,
+                            "text": "SMTP and Shuttle datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "More recently,  presented RISE (Reproducible Information Retrieval System Evaluation), a Web-based service (built on top of a modi ed version of the Indri toolkit) that implements more than 20 state of the art retrieval functions, and evaluates them over 16 standard TREC collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 267,
+                            "end": 283,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method produces superior results to the state-of-the-art approaches for single-view and multi-view learning-based depth estimation on the KITTI driving dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 142,
+                            "end": 163,
+                            "text": "KITTI driving dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(K \u00b7 n t ) is the total number of images in the stronglylabeled data set, and is the collection of n s images retrieved by Bing using the category name of the k-th class as keyword.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Sarwar et alia  collected a 93.7% empty matrix containing ratings of 1650 movies on a 1-5 scale by 943 individuals, and split it 80%/20% into training and test sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also assign a binary label Yc to each candidate c for each object v in validation set V, indicating whether c is a relevant recommendation for v (Yc=1) or not (Yc=0), based on the contents of Yv.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data, including exemplars from the old classes and samples from the new classes, are split into a training set for the first stage and a validation set for the second stage.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a reference, using d = 10 and K = 3 on the AT&T dataset, LMNN learns a metric in about 5 seconds, while LMCA and KLMCA converge to a minimum in 21 and 24 seconds, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 58,
+                            "text": "AT&T dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We introduced additional uncertainty into the data sets by adding noise which was drawn from a uniform probability distribution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to check for possible variations due to the collection type, we plot the iteration-MAP curve for two similar collections, i.e. SJMN and TREC7-8, and a web collection, WT10g.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We achieve the state-of-the-art performance on the standard benchmarks including Office-31 and VisDA-C datasets by integrating our framework with two recent domain adaptation techniques.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 111,
+                            "text": "Office-31 and VisDA-C datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the LiveJournal dataset, MSEIGS-Early is almost 8 times faster than EIGS while attaining similar performance as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 27,
+                            "text": "LiveJournal dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Detecting Anomalous Collections in IMDB.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , the LL+All method, which is our final modification to the log-logistic model, outperforms both baselines in all collections in terms of MAP and P@10.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform experiments measuring the classification accuracy of our system in a strictly class-incremental setup on the following benchmark datasets: MNIST , SVHN , CIFAR-10 , and ImageNet-50 .",
+                    "annotation_spans": [
+                        {
+                            "start": 150,
+                            "end": 155,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 158,
+                            "end": 162,
+                            "text": "SVHN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 165,
+                            "end": 173,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 180,
+                            "end": 191,
+                            "text": "ImageNet-50",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We replicate the experimental setup of , who curated 100 images from the PASCAL VOC 2012 dataset, and manually provided scribbles on objects contained in them.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 96,
+                            "text": "PASCAL VOC 2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At the same time, the experimenter wishes to augment the dataset by adding comparison labels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Due to the increasing interest on analyzing users' opinions some researchers created collections that can be used for a number of different problems.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a result, the original dataset contains 16 823 unique ingredients, which we preprocess to reduce its size and complexity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also tested the AMC-SSDA as pre-processing step in an image classification task by corrupting MNIST database of handwritten digits  with various types of noise and then denoising and classifying the digits with a classifier trained on the original images (Section 4.2).",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 111,
+                            "text": "MNIST database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the DBLP data set, we selected 1000 real queries from the logs of our deployed systems and each query contained 1-6 keywords 7 .",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 21,
+                            "text": "DBLP data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hashing-based approximate nearest neighbour (ANN) search has emerged as an effective technique for efficiently finding nearest neighbours in large multimedia data collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following previous works , for the depth, optical flow and segmentation tasks, we train our networks using all of the raw data in KITTI excluding the scenes appeared in the training set of KITTI 2015 , which we adopt as our validation set and use to compare with other methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 189,
+                            "end": 199,
+                            "text": "KITTI 2015",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Third, the INFOCOM dataset represents the physical proximity interactions between 78 students at the 2006 INFOCOM conference, recorded by wireless detector remotes given to each attendee .",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 26,
+                            "text": "INFOCOM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In brief, it quantifies the accuracy of a given relative validity criterion according to whether or not it identifies the correct number of clusters for a dataset, ignoring completely relative qualities of the partitions under evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments showed that it performs equivalently to the existing dependence models on newswire test collections and outperforms the latter on web data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, we evaluate the performance of state-of-the-art ReID methods on CityFlow-ReID, which is the subset of our benchmark for image-based ReID mentioned in Section 3.3.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 84,
+                            "text": "CityFlow-ReID",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "address the domain shift between various face detection datasets by recalibrating the final classification layer of face detectors using a residual-style layer in a low-shot learning setting.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, for the DS2 dataset with equal lengths, setting w = 1, r = 0%, and k = 256, we see that running time is 293, 498, 852 and 958 seconds for l = 1, 2, 5, and 10, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 32,
+                            "text": "DS2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the MQ2008 dataset has much smaller query and document size, we find it is not sufficient to train deep models purely based on this dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 28,
+                            "text": "MQ2008 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, we check whether the answer occurs at least once in relation with the question's subject, across the whole dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed tracker performs favorably against the state-of-the-art trackers on this dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This exception occurs because there are many unseen relevant documents in the TD2004 dataset and, when the user model assumptions do not hold, the baseline's condensing strategy  may be more effective because it does not rely on these assumptions.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 92,
+                            "text": "TD2004 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A test set of the 100 pages retrieved for each of 30 further person names was also created and annotated.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "propose an innovative dataset for instance-level human analysis, DensePose-COCO, a large-scale ground-truth dataset with image-tosurface correspondences manually annotated on 50k COCO images.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 79,
+                            "text": "DensePose-COCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the motor cortical and hippocampal datasets, DCA outperformed PCA at predicting both current and future behavioral variables on held-out data .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 6 thus discusses how to prune partitions and determine their sizes before subdividing the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have compared these methods on two email corpora, TREC 2007",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 62,
+                            "text": "TREC 2007",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments conducted on the IAM dataset showed that the method outperformed the classical HMM approach.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 40,
+                            "text": "IAM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, we train networks to classify images of handwritten digits obtained from the MNIST dataset (figure-7e).",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 96,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments show scalability results for kernel density estimation on a synthetic ten-dimensional dataset containing over one billion points and a subset of the Sloan Digital Sky Survey Data up to 6,144 cores.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, this decomposition across users makes GPFM feasible for large-scale datasets as we will see in Section 4.1.3.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the generated images of a subject in the testing set, using the trained network on the reduced dataset, as well as using the complete dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "GAN    : Log-likelihood estimates for a standard GAN and our proposed LAPGAN model on CI-FAR10 and STL10 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 94,
+                            "text": "CI-FAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 99,
+                            "end": 113,
+                            "text": "STL10 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show the resulting log joint likelihoods on the three datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We note that here, the Web collections are larger than the newswire collections and are in general noisier.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 38,
+                            "text": "Web collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MAGSAC led to the most accurate results for kusvod2 and Multi-H datasets and it was the third best for AdelaideRMF",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 72,
+                            "text": "kusvod2 and Multi-H datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For real data experiement, we first applied D-PPCA for SfM on the Caltech 3D Objects on Turntable dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 105,
+                            "text": "Caltech 3D Objects on Turntable dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The new method produced good results on the Accident dataset investigated here, as well as in simulations.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 60,
+                            "text": "Accident dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here \"Hotness\" is measured by # of related tweets (retrieved by manually generated query keywords) in our Twitter dataset and not part of the timeline.",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 121,
+                            "text": "Twitter dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection \u03c4 may include all the 3-cliques in G, or a subset, as we explain in Sec. 3.3.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All benchmarked models are evaluated on three different versions of the MNIST dataset, each containing 12000 training and 50000 test images.",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 85,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Minimum Set Coverage problem, given a set S and a collection C of subsets of S such that \u222a c\u2208C c = S, the objective is to find a sub-collection A \u2286 C with the minimum cardinality such that \u222a c\u2208A c = S.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(topics 301-450), further improvement of its performance and that of our combined model may also be possible for W10g and GOV2 collections via collection-specific model tuning.",
+                    "annotation_spans": [
+                        {
+                            "start": 113,
+                            "end": 138,
+                            "text": "W10g and GOV2 collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The SVS dataset includes 175 stereoscopic video sequences with resolution 1920 \u00d7 1080, and each stereoscopic sequence is composed of left and right views.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "SVS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, using our new algorithm that significantly improves the speed of the neighborhood kernels, we show that with larger number of allowed mismatches the neighborhood can perform even better than the stateof-the-art profile kernel: the (7,3)-mismatch neighborhood achieves the average ROC-50 score of 86.32, compared to 84.00 of the profile kernel on the Swiss-Prot dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 359,
+                            "end": 377,
+                            "text": "Swiss-Prot dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Four UCI datasets were tested.",
+                    "annotation_spans": [
+                        {
+                            "start": 5,
+                            "end": 17,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It would appear this is because almost all collections base their design on the Cranfield model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "our base model outperforms all the state-of-the-art methods in Sketchy-Extended Dataset; (ii) our model performs the best overall on each metric and on almost all the datasets; (iii) the gap between our model and the state-of-the-art datasets is almost double in Sketchy-Extended Dataset; (iv)",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 87,
+                            "text": "Sketchy-Extended Dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 263,
+                            "end": 287,
+                            "text": "Sketchy-Extended Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moving the points in the gradient directions torques the learned hyperplane away from the optimal hyperplane for the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the BookCrossing dataset, MudRecS achieves an RMSE score of 0.45, which indicates that the error in rating assignment is less than half a star away from the user-/expert-assigned, actual rating.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 27,
+                            "text": "BookCrossing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments have been performed to evaluate the performance of CBRS against the baselines using a music dataset from last.fm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The model is then inflated into 3D ConvNet as proposed in  (I3D), and pretrained on Kinetics dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 100,
+                            "text": "Kinetics dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the proposed tracker on five benchmark datasets including OTB-2013 , OTB-2015 , VOT-2015 , VOT-2016 , and Temple Color-128 .",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 78,
+                            "text": "OTB-2013",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 81,
+                            "end": 89,
+                            "text": "OTB-2015",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 92,
+                            "end": 100,
+                            "text": "VOT-2015",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 103,
+                            "end": 111,
+                            "text": "VOT-2016",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 118,
+                            "end": 134,
+                            "text": "Temple Color-128",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To gain some experimental support for this intuition we performed the following experiment: we considered the topics 701-850 from the TREC GOV2 collection, and built queries using the words in their title.",
+                    "annotation_spans": [
+                        {
+                            "start": 134,
+                            "end": 154,
+                            "text": "TREC GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The current best performing ranking algorithms, such as BM25 and the language models, all use collection statistics, such as the IDF of query terms, the collection size, and the average document length.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our large experiment, we evaluate across all 26, 000 frames in the 26-video dataset, without adding synthetic effects.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, we use it to evaluate SSL algorithms by using fractions of the standard TIMIT training set, i.e., simulating the case when only small amounts of data are labeled.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 96,
+                            "text": "TIMIT training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This approach will be extended to incrementally updated data sets in section 3.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On our ODP dataset, 11,395 features are selected out of a vocabulary of 14 million terms.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 18,
+                            "text": "ODP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The effectiveness validation is performed on three datasets, CIFAR-10, Street View House Numbers (SVHN), and Ima-geNet ILSVRC 2012.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 69,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 71,
+                            "end": 96,
+                            "text": "Street View House Numbers",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 109,
+                            "end": 130,
+                            "text": "Ima-geNet ILSVRC 2012",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "x and the hinge loss (a, y) = max{0, 1 \u2212 ya} to several binary classification datasets (ADULT, GISETTE, IJCNN, MUSHROOMS, PHISHING and SPLICE).",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 93,
+                            "text": "ADULT",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 95,
+                            "end": 102,
+                            "text": "GISETTE",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 104,
+                            "end": 109,
+                            "text": "IJCNN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 111,
+                            "end": 120,
+                            "text": "MUSHROOMS",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 130,
+                            "text": "PHISHING",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 135,
+                            "end": 141,
+                            "text": "SPLICE",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we also utilize the publicly available AOL dataset in our second experiment.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 63,
+                            "text": "AOL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We supplement this with an account of available datasets and packages plus a live demo based on these.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our algorithm offers accuracy similar to prior methods on sparse or homogeneous datasets such as the NGSIM dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 114,
+                            "text": "NGSIM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consequently, the resultant PAC-Bayes bound may have several local minima for certain data sets-thus giving an intractable optimization problem in the general case.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate this process on a single onedimensional attribute from the Wine Quality dataset , which is later used for experimental results (see .",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 94,
+                            "text": "Wine Quality dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "computing SIFT descriptor vectors at those keypoints, and (iv) clustering the entire collection of SIFT descriptors into groups whose centers will define the visual words.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "test dataset, winning the 1st place in COCO 2018 Challenge DensePose task by a very large margin.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Emotion recognition In this section we experiment on the AFEW dataset , which consists of videos depicting 7 classes of emotions",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 69,
+                            "text": "AFEW dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "with the publicly available Waterloo Spam Ranking for the ClueWeb09 dataset, which assigns a \"spamminess\" percentile S(d) to each document d in the collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 75,
+                            "text": "ClueWeb09 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Learning to Ranking Challenge 2 data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 41,
+                            "text": "Ranking Challenge 2 data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The training set is used to learn the feature and classifier layers, and the validation set is used to learn the bias correction layer.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first dataset contains instances of 8 basic physical activities (e.g. walking, running, going up/down stairs, going up/down elevator, sitting, standing, and brushing teeth) from 7 different users.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ClueWeb Wikipedia Dataset for Federated Search.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 25,
+                            "text": "ClueWeb Wikipedia Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Spatial keyword search is an important functionality in exploring useful information from a corpus of geodocuments and has been extensively studied for years .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unsupervised RE-ID refers to that the target dataset is unlabelled but the auxiliary source dataset is not necessarily unlabelled .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, as explained in Section 4.2.1, we use k-Space to compute V and then use regression on V (in one dataset we also add an additional ridge regularization).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this case, to fulfill memory constraints, it is however possible to partition the set of counters, and perform more scans of the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ClueWeb09 collection was used in 2011 and 2012, and the ClueWeb12 collection in 2013 and 2014.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 24,
+                            "text": "ClueWeb09 collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 60,
+                            "end": 80,
+                            "text": "ClueWeb12 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the dataset is sorted by the KL-divergence, and subsequently split into three groups by empirically determining cutting points where shadows appear sharp (corresponding to a softness of 1, or sunny skies) and where shadows are not visible (corresponding to a softness of 3, or overcast skies).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For clarity, let us consider a feature distribution matrix over a finite dataset, where each row is a feature, each column is an example, and each entry f j is the activity of feature j on example i.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested the clustering quality of HPStream and HDDStream on the Network Intrusion Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 91,
+                            "text": "Network Intrusion Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, this may be less effective with a dynamically changing document collection, and is unnecessary with our approach because global statistics are derived from a large proportion of the document collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consequently, although CUB-200-2011 is not a large-scale dataset such as ImageNet-1k, this is a particularly challenging dataset to conduct WSOL.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 35,
+                            "text": "CUB-200-2011",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 84,
+                            "text": "ImageNet-1k",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first, action group (8 subjects) was asked to recognize the actions in the image and indicate them from the labels provided by the PASCAL VOC dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 135,
+                            "end": 153,
+                            "text": "PASCAL VOC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also consider a list of manually selected datatype properties (i.e., label, title, name, full-name, family-name, given-name, has-pretty-name, prefLabel, given-name, nickname, which frequently occur in various LOD datasets) that point to a label or textual description of the entity.",
+                    "annotation_spans": [
+                        {
+                            "start": 212,
+                            "end": 224,
+                            "text": "LOD datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the difference in both size and distributional properties between these collections are far greater.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The diversity of images of the Aesthetics dataset paired well with these pre-trained weights up to the top fully connected layers, so we only initialized at random the last output layer.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 49,
+                            "text": "Aesthetics dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To be specific, the query image is a normal ground-level image (e.g., a street view image taken by a tourist) whereas the database images are collections of aerial/satellite images covering the same (though 33rd Conference on Neural Information Processing Systems (NeurIPS 2019), Vancouver, Canada.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Generally, a popularity score is assigned to each query based on the frequency of the query in the query log from which the query database was built.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and  show the number of sent emails and the number of active people in this collection respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As for the edge detection task, we evaluate the edge probability map using F-measure of both Optimal Dataset Scale (ODS) and Optimal Image Scale (OIS).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Variation of the performance with n is not shown for the computer dataset because computer dataset has only 20 examples per task.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 73,
+                            "text": "computer dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 82,
+                            "end": 98,
+                            "text": "computer dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on both synthetic data and various types of public benchmark datasets show that the proposed framework effectively captures the multi-class label correlation and significantly outperforms existing state-of-the-art baseline methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From this figure, we can see that the performance obtained by the empirical setting is comparable to the retrieval performance obtained by the optimized parameter settings on all datasets used.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compose a hybrid dataset of human shape pairs from SMPL , TOSCA and SPRING ; see the last three columns of  for examples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, we apply these convergence rates related to computational errors to establish high-probability generalization bounds for the model trained by SGD through multiple passes over the training examples, which is a typical way of using SGD to process large datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experimented with the Network Intrusion and the Forest Cover Type datasets, which are typically used for the evaluation of stream clustering algorithms e.g.,  and they were also used in the experiments of HP-Stream .",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 77,
+                            "text": "Network Intrusion and the Forest Cover Type datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The AOL dataset does not have any information about users age groups but users in the Bing dataset were signed-in when issuing their queries, and their age information were collected from their profiles at the time.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "AOL dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 98,
+                            "text": "Bing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The weights of the backbone part are initialized with the publicly released ResNet model pre-trained on the ImageNet dataset , and the weights of the remaining part are initialized from the zero-mean Gaussian distribution with \u03c3 = 0.01 and as in He et al. .",
+                    "annotation_spans": [
+                        {
+                            "start": 108,
+                            "end": 124,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After careful dataset selection , we considered the two moons toy problem (see ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Slim code tables attain very high compression ratios; in particular on large and dense datasets, we obtain tens of percentages better compression than Krimp, while considering orders-of-magnitude fewer candidates.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore from 3, 215, 171 permalinks (post-pages) in the Blogs06 collection we selected 1, 754, 334 post-pages.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 76,
+                            "text": "Blogs06 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The datasets from  used were originally based on the CMU Visual Localization dataset  and the the RobotCar dataset  respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 84,
+                            "text": "CMU Visual Localization dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 98,
+                            "end": 114,
+                            "text": "RobotCar dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The training procedure for solving the SVC is usually based on Quadratic Programming (QP) which presents some inherent limitations, mainly the computational complexity and memory requirements for large training data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the execution time normalized by the execution time of NG for each dataset (see column T n (s) in ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": CCA discovered ranking function for the WBR99 collection SIGIR 2007",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 69,
+                            "text": "WBR99 collection SIGIR 2007",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A fine-tuning is carried out using the individual public dataset before evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Clearly, our method yields a better set of identified domains, which are always better than the original datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SCR and GSDT do not perform as well as 1NN on this data set for early prediction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To refine the raw datasets, we first analyze the coverage of different features in these two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We provide a new benchmark dataset by extending the well-known FAUST dataset, which contains six resolution shapes with different connectivities and transformations.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 76,
+                            "text": "FAUST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the an example precision matrix of one of K Gaussian components that are learned by the methods of PLE, Sparse-GMM, and Sparse-GMM(G) on the face dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 147,
+                            "end": 159,
+                            "text": "face dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One can observe that though the parameters change dramatically in wide ranges, the AUC measure only changes about 1% -5% for most of the data except for the heart, PROSTATE, and yaleB data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 157,
+                            "end": 193,
+                            "text": "heart, PROSTATE, and yaleB data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the relevance feedback in an optimization framework, and tends to minimize the diver-gence between the feedback topic model and the feedback documents, and in the same time maximize the divergence between the feedback topic model and the background collection model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In all cases, we show an improved performance in comparison to methods with a similar level of supervision, and performance that is on par with the best directly supervised methods on KITTI and Make3D datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 184,
+                            "end": 209,
+                            "text": "KITTI and Make3D datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All these datasets, described in Supplementary Material G, can be download from the LIBSVM website .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform empirical experiments on a real-world dataset crawled from Instagram to corroborate the efficacy of the proposed framework.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The verticals are created either by classifying items in the web collections into different genres (e.g. Blog, News) or by adding items from other multimedia collection (e.g. Image, Video).",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 76,
+                            "text": "web collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All alternative networks are trained on the same dataset, and we report the accuracy on our test dataset for comparison.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Implementation Details Sampling distribution in Netflix and Yahoo Music Datasets Experiments using Random Graph Models Threshold for Complete Cascading Netflix Challenge Dataset",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 80,
+                            "text": "Netflix and Yahoo Music Datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 152,
+                            "end": 177,
+                            "text": "Netflix Challenge Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Examining performance for all the methods across the three datasets, the methods performed best on ACE, worst on LGL, and in the middle for CLUST.",
+                    "annotation_spans": [
+                        {
+                            "start": 140,
+                            "end": 145,
+                            "text": "CLUST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 99,
+                            "end": 102,
+                            "text": "ACE",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 113,
+                            "end": 116,
+                            "text": "LGL",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the similar procedure of Adaboost algorithm , we introduce a distribution of weights Dt to indicate the retrieval difficulty of the instances in the training data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The algorithm is evaluated on challenging synthetic and real datasets, outperforming existing approaches and reliably converging to the global optimum.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use a standard public dataset, the MSRC 21-class Image Dataset , to perform experimental evaluations for the HIM.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 65,
+                            "text": "MSRC 21-class Image Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We performed a simple classification experiment on this dataset, using the one nearest neighbor algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(2) On each dataset, the average Kendall's \u03c4 for top 50 is 0.77-0.92, which indicates that LSR does not maintain the relative rank of JSR, even for top 50.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method also shows real-time performance on the Cityscapes dataset, but at a much higher accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 69,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next we present the key zero-shot results for our method applied to three challenging datasets using over 250 real attribute classifiers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the high computational costs of the GP model has constrained its applications over large-scale data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The graph for the MB2012 collection, which was not constructed using the EaaS model, is different from the others.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 35,
+                            "text": "MB2012 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "No additional improvement is observed on the COCO benchmark by further replacing the regular conv layers in the conv2 stage.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 59,
+                            "text": "COCO benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first three are neuron image segmentation datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The authortopic model assumes that each author in the document collection is represented by a distribution over topics, and each word is associated with two latent variables: an author and a topic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While prior work has largely focused on specific techniques for query reformulation, in Section 3, we first build an oracular query formed from known relevance judgments for the CLEP-IP 2010 prior art test collection  in an attempt to derive an upper bound on performance of standard Okapi BM25 and Language Models (LM) retrieval algorithms for this task.",
+                    "annotation_spans": [
+                        {
+                            "start": 178,
+                            "end": 216,
+                            "text": "CLEP-IP 2010 prior art test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, our approach significantly improves the performance on Actor 3 in the Campus dataset and Actor 2 in the Shelf dataset, which suffer from severe occlusion.",
+                    "annotation_spans": [
+                        {
+                            "start": 119,
+                            "end": 132,
+                            "text": "Shelf dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 85,
+                            "end": 99,
+                            "text": "Campus dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Contrastive divergence training was done on the dataset accompanying their code, which comprises 10240 16 \u00d7 16 color patches randomly extracted from the Berkeley dataset and statistically whitened.",
+                    "annotation_spans": [
+                        {
+                            "start": 153,
+                            "end": 169,
+                            "text": "Berkeley dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The constraints were generated as follows: for each constraint, we picked out one pair of data points randomly from the input data sets (the labels of which were available for evaluation purpose but unavailable for clustering).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the TREC Adhoc T07 and T08 collections: 528,155 documents made up of four TIPSTER sub-corpora: Foreign Broadcast Information Service (TIPFBIS, 130,471 documents); Federal Register (TIPFR, 55,630 documents); Financial Times (TIPFT, 210,158 documents); and Los Angeles Times (TIPLA, 131,896 documents).",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 46,
+                            "text": "TREC Adhoc T07 and T08 collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 171,
+                            "end": 187,
+                            "text": "Federal Register",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 111,
+                            "end": 169,
+                            "text": "Broadcast Information Service (TIPFBIS, 130,471 documents)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 171,
+                            "end": 213,
+                            "text": "Federal Register (TIPFR, 55,630 documents)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 263,
+                            "end": 307,
+                            "text": "Los Angeles Times (TIPLA, 131,896 documents)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 215,
+                            "end": 257,
+                            "text": "Financial Times (TIPFT, 210,158 documents)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our approach is complementary in that we leverage unlabelled, but real-world YouTube videos from the Kinetics dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 101,
+                            "end": 117,
+                            "text": "Kinetics dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the dataset consists of complete feature trajectories (incomplete trajectories were removed manually to form the dataset), we select \u03c1 fraction of feature points across all frames uniformly at random and remove their x \u2212 y coordinate values.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our empirical evaluation using several real-world high-dimensional cancer gene expression survival benchmark datasets, our model attains very competitive Cindex values and outperforms most of the competing methods available in the literature of survival analysis.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the proposed ranker reveals that incorporating the moments, e.g. variance, into document ranking provides an alternative way to \"correct\" the estimation without using collection data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested the algorithm on a large number of datasets and compared it to existing, state-of-the-art methods, showing both improved accuracy and runtime.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition to these tasks, the BDG-database was used to compare the DDA algorithm to other approaches.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 44,
+                            "text": "BDG-database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In each data set, documents are randomly split into training and testing sub-sets with the ratio 4 : 1, then classified by the linear support vector machine classifier LIBLINEAR .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This seems to be in line with the hypothesis formulated in , for which dependence models may yield larger improvements for large collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For evaluatation, we sampled the image pairs from these datasets and transferred the attribute of target image to the source image as shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Alternating optimization approaches have scalability issues since the number of matrix multiplications and convex optimization steps in each iteration depends both on the data set size and its dimensionality.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Considering temporal effects enabled MNB-W and MNB-S to perform better than MNB on the TV data set; however, increasing the number of training tweets is not effective for achieving their higher classification accuracy, as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 98,
+                            "text": "TV data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Reuters-21578 corpus also has hierarchical structure.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 20,
+                            "text": "Reuters-21578 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Their accuracy is significantly worse on realistic full resolution datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Dual-Route Cascade model (DRC) , the lexical route is implemented as an Interactive Activation  system, while the non-lexical route is implemented by a set of grapheme-phoneme correspondence (GPC) rules learned from a dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For MovieLens experiments we use the largest of the available datasets MovieLens-10M with 10 million ratings from 72,000 users on 10,000 movies.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 84,
+                            "text": "MovieLens-10M",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is in contrast to other strategies (e.g., Apriori ), which compute all rules from the training set beforehand (i.e., offline), possibly including rules that might not be useful when recommending for objects in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the RGB and depth modalities are well-aligned in the VIVA and EgoGesture datasets, but are not completely aligned in the NVGestures dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 91,
+                            "text": "VIVA and EgoGesture datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 131,
+                            "end": 149,
+                            "text": "NVGestures dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we propose an adversarial learning network for the task of multi-style image captioning (MSCap) with a standard factual image caption dataset and a multistylized language corpus without paired images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We verify the efficacy of the proposed method on two UCI data sets by comparing the evaluation results of random sampling and sampling strategy suggested by the analysis.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 66,
+                            "text": "UCI data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The TAC 2014 collection has about one million newswire articles and some entity linking annotations that specify the entity IDs of mentions in a knowledge base.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "TAC 2014 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is relevant because recent results have shown that even for linear regression, different datasets cannot be easily pooled in simple ways .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the SIFT data set (n = 10 6 and d = 128), the cross-polytope LSH achieves a modest speed-up of 1.2\u00d7 compared to the hyperplane LSH (see ).",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "SIFT data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is publicly available but has no metadata (e.g. item descriptions, categories) that is needed to interpret a model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is good news for IR test collection builders and users.',",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the testing phase, given a novel class, we randomly select 500 images (no overlap with the training set) from it as the positive examples and randomly select 5 images from each base class of the ImageNet validation set as negative samples.",
+                    "annotation_spans": [
+                        {
+                            "start": 198,
+                            "end": 221,
+                            "text": "ImageNet validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On very large datasets it is typically unrealistic to use the polynomial kernel on the entire dataset, and approximation techniques, like the ones we suggest, are necessary.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A dataset in ADR is partitioned into a set of chunks to achieve high bandwidth data retrieval.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Most o en, test collections are built using the pooling method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(1) The action detection datasets are comprised of untrimmed video samples, and the goal is to recognize and localize the action instances on temporal domain  or spatial-temporal domain .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Charades is multi-label, action classification, video dataset with 157 classes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "PIX10P. is publicly available from https://featureselection.asu.edu/datasets.php",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 6,
+                            "text": "PIX10P",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There is no benchmark dataset for difference summarization of multilingual news, so we built the evaluation dataset as follows:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The work of  is the most related to ours in this group, as they warp the individual frames of the multi-view video dataset according to the target pose to generate new sequences.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the main aim of this paper is to demonstrate that reservoir computing can yield a good acoustic model, we will conduct experiments on TIMIT, an internationally renowned corpus  that was specifically designed to support the development and evaluation of such a model.",
+                    "annotation_spans": [
+                        {
+                            "start": 140,
+                            "end": 145,
+                            "text": "TIMIT",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments are conducted by using publicly available LtR datasets: the MSN 1 and the Yahoo!",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 77,
+                            "text": "MSN 1",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 92,
+                            "text": "Yahoo!",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Few of the problems associated with searching dynamic collections are well understood, such as defining time-sensitive relevance, understanding user query behavior over time and understanding why certain web content changes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MegaFace dataset is divided into two subsets: (1) the gallery set containing more than 1 million images from 690K identities, and (2) the probe set which is composed of two existing datasets:",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "MegaFace dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, SPLD achieves the best MAP so far reported in literature on the Hollywood2 and Olympic Sports datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 116,
+                            "text": "Hollywood2 and Olympic Sports datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i for object i. Note that the centroids are generally not objects in the original dataset,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The LastFM dataset contains 1892 users and 17632 items (artists).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "LastFM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(see text for the shadow softness definition used), and over the entire dataset (\"all\").",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Material classification: For this task, we used the KTH-TIPS2b dataset , which contains images of 11 materials captured under 4 different illuminations, in 3 poses, and at 9 scales.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 70,
+                            "text": "KTH-TIPS2b dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results for different evolution thresholds for the DBLP and Citation data sets are illustrated in  and (e) respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 82,
+                            "text": "DBLP and Citation data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset, the table is broken into 3 blocks: baselines, new heuristics and L2R based methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Let \u03c4 denote a collection of m 3-cliques in G, m \u2264 n 3 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We designed a neural network as a CNN-based autoencoder structure for all two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "CS 15k consists of 15,086 terms from the list of keywords with document frequency between 20 and 13,226 (the maximum document frequency), and terms not in this set occurred in less than 0.026% (20/77,841) of the crawled Intranet collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 220,
+                            "end": 239,
+                            "text": "Intranet collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each participant, we randomly select an existing user from the review datasets, and present this user's previous reviews to the participant to read.",
+                    "annotation_spans": [
+                        {
+                            "start": 67,
+                            "end": 82,
+                            "text": "review datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on the large-scale Visual Genome dataset demonstrate the superiority of the proposed method over current state-ofthe-art competitors.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 62,
+                            "text": "Visual Genome dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(1) To address the blur issue, is it possible to sample a collection of high quality proposals conforming to possible frame distribution at following time stamp, and select the final prediction from it?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the expert should generate that will better augment the existing dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The challenge we are facing is how to estimate a Cross Term from a document collection, and integrate the Cross Term into a probabilistic weighting model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "presents the segmentation accuracy, the  : Illustrations of the effectiveness of pixel-wise and structured distillation schemes in terms of class IoU scores on the network MobileNetV2Plus  over the Cityscapes test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 198,
+                            "end": 217,
+                            "text": "Cityscapes test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We discover the underlying domains of the training datasets, each of which will be adaptable, whereas the landmarks in  are intentionally biased towards the single given target domain.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Large differences in these results may be taken as evidence in support of cluster structure, and this idea may be used to determine the most probable number of clusters present in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset could potentially support researches such as web graph analysis, spam detection and etc.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With extensive evaluation on popular benchmark datasets, we confirm a significant improvement over other state-of-the-art methods in both typical and generalized zero-shot recognition.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, a new reverse KL-divergence training criterion which yields the desired behaviour of Prior Networks and allows them to be trained on more complex datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "GCRF train and test data: We use the previously described synthetic graph as the training data set for all the models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One researcher noted I1: \"for me it starts with developing the research questions and data collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given large scale manually labeled image datasets, e.g. ImageNet, ConvNets can be well trained by back propagation and achieve state-of-the-art performance on many tasks such as image classification  and object detection .",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 64,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Mostly these approaches are validated by adaptating between datasets, which, as discussed above, do not necessarily correspond to well-defined domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation measures, datasets and experiments are described next.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A potential reason is that the dataset is too large (200TB) and it is not a trivial task to process the dataset in laboratory.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "[64.6% mAP, 58.4% MOTA] on single scale at \u223c30 FPS, and [71.5% mAP, 61.3% MOTA] on multiple scales at \u223c7 FPS on the PoseTrack 2017 validation set using one GTX 1080 Ti.",
+                    "annotation_spans": [
+                        {
+                            "start": 116,
+                            "end": 145,
+                            "text": "PoseTrack 2017 validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If all event labels occur in the dataset in similar proportions, the worst case complexity of the FBS-IW and FBS-CW algorithms are O(N 2 + M 3 N ) and O(M 3 N 2 ) respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "extended the AT model and proposed the Author-Recipient-Topic (ART) model and the Role-Author-Recipient-Topic (RART) model in order to analyze the Enron e-mail corpus and an academic e-mail network.",
+                    "annotation_spans": [
+                        {
+                            "start": 147,
+                            "end": 166,
+                            "text": "Enron e-mail corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The figure shows that when k is small such as 1 and 2, the optimal b is positive for all collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comparison with the State-of-the-Arts:  shows the experimental results on the MegaFace dataset compared with the existing deep learning based methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 94,
+                            "text": "MegaFace dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Remarkably, the recall rate@top-1 improves from 22.5% in  (or 40.7% in ) to 89.8% on CVUSA benchmark, and from 20.1%  (or 46.9% in ) to 81.0% on the new CVACT dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 100,
+                            "text": "CVUSA benchmark",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 153,
+                            "end": 166,
+                            "text": "CVACT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given that the default version of the parser we used is trained on news articles, one may reason that its accuracy could improve if we train it on the retrieval collection, or on doc-uments of the same domain.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, structured experiments on a standardized music dataset to investigate the effects of doing so are scarce.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to evaluate how well we reduce the confidence on the outdistribution, we use four datasets on CIFAR-10  and SVHN  (namely among CIFAR-10, CIFAR-100, SVHN, ImageNet-, which is a subset of ImageNet where we removed classes similar to CIFAR-10, and the classroom subset of LSUN",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 111,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 117,
+                            "end": 121,
+                            "text": "SVHN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 164,
+                            "end": 172,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 158,
+                            "end": 162,
+                            "text": "SVHN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 147,
+                            "end": 156,
+                            "text": "CIFAR-100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 137,
+                            "end": 145,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 196,
+                            "end": 204,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 241,
+                            "end": 249,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 279,
+                            "end": 283,
+                            "text": "LSUN",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Except for LWFA, all methods can evaluate a data set in non-iterative calculations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A Gaussian process is a collection of random variables such that any finite subset of these variables conform a joint Gaussian distribution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiment results obtained with the CAL500 dataset shows that the model improves the performance of music search and annotation significantly.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 51,
+                            "text": "CAL500 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On both two levels of network capability, our methods attains the highest performance for each datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a dataset satisfying Strict Threshold Separation, there exists an algorithm which can find the target partitioning for any hypothesis class in O(log(n))",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, two of our generated data sets with 20, 000 data points and 20 and 25 dimensions are denoted by Syn20.D20K and Syn25.D20K respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We remark that there is no publicly available dataset providing user session data, document relevance and query-document pairs",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Multiple attempts have been made to mitigate the systematic biases of VQA datasets as discussed in section 1 , but they fall short in providing an adequate solution: Some approaches operate over constrained and synthetic images , neglecting the realism and diversity natural photos provide.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 82,
+                            "text": "VQA datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Negative log-likelihoods per symbol (the base oflogarithm is always taken to be the number of symbols in the alphabet) of the test set computed using the fitted models exhibited a steplike increasing tendency shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show a sample time series for the stock dataset and its quantized version using 1-bit and 4-bit MMSE, as well as the absolute error due to quantization.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The purpose is not to exclude users that occasionally post in a non-English language, but to make sure the data set does not contain too many strictly non Englishspeaking users.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It also attains state-of-the-art performance on the Visual Genome  and VRD  datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 84,
+                            "text": "Visual Genome  and VRD  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, a sparse data set with over 2 million nonzero entries (Grolier) can be queried in just over 1 second.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on Real Dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hence, in the prediction data set, each observation consists of a set of predictor variables or features that represent information about two recipes, and the response variable is a binary indicator with value \"true\" in the case when a was selected over b and the value \"false\" when b was selected over a.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We remove the last softmax classifier layer of ResNet-50 and initialize the rest layers with parameters pretrained in Ima-geNet ILSVRC12 dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 122,
+                            "end": 144,
+                            "text": "geNet ILSVRC12 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the iFDM on two different real databases: the Reference Energy Disaggregation Data Set (REDD) , and the Almanac of Minutely Power Dataset (AMP) .",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 93,
+                            "text": "Reference Energy Disaggregation Data Set (REDD)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 104,
+                            "end": 143,
+                            "text": "Almanac of Minutely Power Dataset (AMP)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As in previous cases, the publication collection of the last year is used for testing purposes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The TREC-4 collection was many times the size of earlier collections, and also more diverse, but despite this the conclusions remained fundamentally the same.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "TREC-4 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also evaluate our segmentation model on two largescale indoor datasets Stanford 3D Large-Scale Indoor Spaces (S3DIS)  and ScanNet .",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 111,
+                            "text": "Stanford 3D Large-Scale Indoor Spaces",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 125,
+                            "end": 132,
+                            "text": "ScanNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that XNV consistently outperforms a state-of-the-art algorithm for semi-supervised learning: substantially improving predictive performance and reducing the variability of performance on a wide variety of real-world datasets, whilst also reducing runtime by orders of magnitude.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore we must add the empty set to the collection of globally closed itemsets only when no item has support equal to |D|.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Lastly, it is important, when evaluating the retrieval score of a document, to weigh down terms occurring in many documents, i.e. which have a high document/collection frequency, as these terms have a lower discrimination power.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We mainly compared ADCrowdNet with the state-of-the-art approach CSRNet  which have demonstrated the best performance on the datasets including the ShanghaiTech, UCF CC 50, the WorlExpo'10, and UCSD datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 148,
+                            "end": 207,
+                            "text": "ShanghaiTech, UCF CC 50, the WorlExpo'10, and UCSD datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the largest real dataset: Climate with more than 10,000 points, QUIC takes more than 10 hours to get a reasonable solution (relative error=0), while DC-QUIC-3 converges in 1 hour.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 37,
+                            "text": "Climate",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each data set had labels which indicate whether each session is anomalous or not.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Its improvements over LeToR are more than 10% on both datasets with only one exception: Precision@1 on New York Times.",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 117,
+                            "text": "New York Times",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Hands2017Challenge is composed from parts of the Big Hand 2.2M dataset  and the First-person Hand Action Dataset (FHAD) , it is currently the largest dataset available.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 74,
+                            "text": "Big Hand 2.2M dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 84,
+                            "end": 116,
+                            "text": "First-person Hand Action Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As we saw in Section 5, for similar datasets, SHLDA's context-specific regression is more useful when global lexical weights do not readily differentiate documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since DCI CLOSED projects the dataset at the first level of the visit, it requires at most (3M ) \u00d7 N bits to run over a dataset D with a minimum support threshold equal to one.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For comparing the effectiveness, the following state-of-theart methods were also evaluated on both CRD and CRD-S datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 121,
+                            "text": "CRD and CRD-S datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider two different visual features: (1) Two-stream RGB features  from the original Charades dataset, which is a frame-level feature from VGG-16  network, we denote the model as MAN-VGG.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 106,
+                            "text": "Charades dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset used in the experiment on natural images was prepared by sampling N = 200 000 patches ofD = 26 \u00d7 26 pixels from the van Hateren image database  (while constraining random selection to patches of images without man-made structures).",
+                    "annotation_spans": [
+                        {
+                            "start": 128,
+                            "end": 154,
+                            "text": "van Hateren image database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We describe the application of the approach to an ischemic stroke domain with clinical trial data (International Stroke Trial Dataset, 1993-1996.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 133,
+                            "text": "International Stroke Trial Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Publicly available, standardized datasets of music listening behavior are rare.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "extended their work and introduced the Procedurally Generating Matrices (PGM) dataset by instantiating each rule with a relationobject-attribute tuple.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 85,
+                            "text": "Procedurally Generating Matrices (PGM) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second data set consists of quantized daily volatility changes of the Dow Jones Industrial Average (DnA).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then one can e.g. use a decision tree A 1 with a chosen hyperparameter configuration \u03bb 1 and train it on the data set D 1 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To analyze impact when most of the sessions are ID and more complex, the MINED dataset is obtained directly from the filtering algorithm by setting the threshold on the Number of Distinct Aspects to be 5.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 86,
+                            "text": "MINED dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Homogr dataset consists of mostly short baseline stereo images, whilst the pairs of EVD undergo an extreme view change, i.e. wide baseline or ex-6 http://cmp.felk.cvut.cz/wbs/ treme zoom.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "Homogr dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Photo Tourism patch dataset 1 , Notre Dame, which contains 104,106 patches, each of which is represented by a 128D SIFT descriptor (Photo Tourism SIFT); and 2) MIR-Flickr 2 , which contains 25,000 images, each of which is represented by a 3125D bag-of-SIFT-feature histogram; For each dataset, we further conduct a simple preprocessing step as in , i.e. mean-centering each data sample, so as to obtain additional mean-centered versions of the above datasets, Photo Tourism SIFT (mean), and MIR-Flickr (mean).",
+                    "annotation_spans": [
+                        {
+                            "start": 160,
+                            "end": 170,
+                            "text": "MIR-Flickr",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 491,
+                            "end": 501,
+                            "text": "MIR-Flickr",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 27,
+                            "text": "Photo Tourism patch dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 460,
+                            "end": 478,
+                            "text": "Photo Tourism SIFT",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 132,
+                            "end": 150,
+                            "text": "Photo Tourism SIFT",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 5, we demonstrate the scalability of our framework on kernel density estimation on both synthetically generated dataset and a subset of SDSS dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 147,
+                            "end": 159,
+                            "text": "SDSS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, we have 216,092 geo-tagged photos in our Paris collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 71,
+                            "text": "Paris collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Discriminative also compares favorably with the recent triplet+smart mining method , i.e., on the CAR196 dataset, Discriminative has 3.6% improvement in R@1 over the triplet+smart mining.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 112,
+                            "text": "CAR196 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The comparison of the complexity and error ratios on all the datasets thus suggests that the flat classification strategy may be preferred on IPC, whereas the hierarchical one is more likely to be efficient on the LSHTC datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 214,
+                            "end": 228,
+                            "text": "LSHTC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also conducted search relevance experiments to test the effectiveness of GRLSI and GNMF on another dataset, the Web-II dataset, which is obtained from the same web search engine.",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 129,
+                            "text": "Web-II dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "for short queries at different pruning levels, baseline and different settings (WT2g collection) with df > N/2 is always a good option.",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 95,
+                            "text": "WT2g collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To further demonstrate the generalization capacity of our approach, we conduct experiments on challenging COCO dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 118,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are total 10 common categories among the 4 datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The model can be used for any text collections with specification (key-value) type prior knowledge.1 Introduction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on MovieLens dataset show the superiority of the proposed algorithm over existing graph based recommending algorithms.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 41,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Cityscapes dataset contains around 5000 accurately annotated real world images with pixel-level category labels.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 22,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate that HIM outperforms other state-of-the-art methods by evaluation on the challenging public MSRC image dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 125,
+                            "text": "MSRC image dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, we detail how the first publicly available Twitter corpus",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 72,
+                            "text": "Twitter corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We input the labeled search sessions S which is randomly split into a training and a validation set in line 2.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first one is a subset of the SUN360 dataset , which contains 593 living rooms and bedrooms panoramas.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 47,
+                            "text": "SUN360 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As reported in , dip-means correctly discovers the number of clusters for the subsets of Pendigits, while providing a reasonable underestimation k e near the optimal for the full datasets PD10 tr and PD10 te .",
+                    "annotation_spans": [
+                        {
+                            "start": 188,
+                            "end": 195,
+                            "text": "PD10 tr",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 200,
+                            "end": 207,
+                            "text": "PD10 te",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Large-scale text datasets released as part of LSHTC typically consist of hundreds of thousand training instances which are distributed among tens of thousand target categories.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent papers using the WebQuestions dataset have primarily used the average F1-score",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 44,
+                            "text": "WebQuestions dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From left to right, we display the density maps generated by TEDnet on ShanghaiTech Part A, ShanghaiTech Part B, UCF-QNRF, UCF CC 50, and WorldExpo'10 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 159,
+                            "text": "ShanghaiTech Part A, ShanghaiTech Part B, UCF-QNRF, UCF CC 50, and WorldExpo'10 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "72 of the molecules are shared between two datasets but MUSK2 dataset contain more instances for the shared molecules.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 69,
+                            "text": "MUSK2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The classification results on the UCI datasets and the gene expression datasets are reported in  and , respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 46,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Reviewers can then rely on observstional properties of test collections rather than requiring the blessing of a major venue in order to trust a test collection experiment.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Among all methods, DLFZRL tended to perform the best on most data sets, no matter if the setting was traditional or generalized zero-shot learning.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the results on both data sets reveal an interesting trend: the multilayer ar often perform better than their single layer counterparts.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Affordability: Acquiring content, creating representations, constructing queries, generating documentation, and sharing the collection",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data set contains the information of topics, regions and industries for each document and a hierarchical structure for topics and industries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Social-IQ builds upon lessons learned from previous multimodal datasets and includes some key components: 1) unconstrained and unscripted environment:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the ability of our learned strategies to detect objects on the very popular and highly challenging PASCAL VOC 2010 dataset , which contains object instances from 20 classes (e.g. car, sheep, motorbike) annotated by bounding-boxes and viewpoint labels (e.g. left, front).",
+                    "annotation_spans": [
+                        {
+                            "start": 111,
+                            "end": 134,
+                            "text": "PASCAL VOC 2010 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition two other pairwise dependency detection methods were applied on this dataset: dependency detection using the estimated event density (that is a more precise solution to Problem 1, which is not described here due to space limitations) and episode detection method similar to the one described in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The best L2R based strategy depends on the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We now evaluate our segmentation-driven multi-object 6D pose estimation method on the challenging Occluded-LINEMOD  and YCB-Video  datasets, which, unlike LINEMOD , contain 6D pose annotations for each object appearing in all images.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 139,
+                            "text": "Occluded-LINEMOD  and YCB-Video  datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 155,
+                            "end": 162,
+                            "text": "LINEMOD",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Based on this data, we run our TABULARGREEDY algorithm with varying numbers of colors C on the blog data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 108,
+                            "text": "blog data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the experiments we use the gov2.1000 and gov2.30 splits of the TREC GOV2 dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 83,
+                            "text": "TREC GOV2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, in practice collections may share many common documents as either exact or near duplicates, potentially leading to high numbers of duplicates in the final results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Single-dataset setting is the most common setting in related works, allowing extensive pre-training using k-fold :",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Without bells and whistles, a simple two-stage Cascade RPN achieves AR 13.4 points improvement compared to RPN baseline on the COCO dataset , surpassing any existing region proposal methods by a large margin.",
+                    "annotation_spans": [
+                        {
+                            "start": 127,
+                            "end": 139,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Instance-based re-sampling generates the training sets by re-sampling from the original dataset with instance replacement.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With that thought in mind, a common technique used by search engines was tested on the newly ambiguous test collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the Visual Dialog v1.0 dataset, the proposed synergistic network boosts the discriminative visual dialog model to achieve a new state-of-the-art of 57.88% normalized discounted cumulative gain.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 33,
+                            "text": "Visual Dialog v1.0 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, for high-dimensional datasets, it is more natural to partition the data by features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\u2208 F. This would multiply by five the size of the input dataset, making the learning process very expensive.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "document lists for the TREC datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 36,
+                            "text": "TREC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compute the curvature profile at 100 random test samples, and show the average curvature in  for CIFAR-10 and SVHN datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 126,
+                            "text": "CIFAR-10 and SVHN datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We found on most of the data sets the cuts found by the method included all or almost all of V \\ L.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Bill cosponsorship information dating back to the 93rd session of Congress is accessible from the Library of Congress' Thomas database.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 134,
+                            "text": "Library of Congress' Thomas database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In practice, however, datasets are often obtained via ad-hoc collection, or on the basis of proprietary data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this work we introduce the novel COncrete DEfect BRidge IMage dataset (CODEBRIM) for multi-target classification of five commonly appearing concrete defects.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 72,
+                            "text": "COncrete DEfect BRidge IMage dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "far more simultaneously robust to scale of attack perturbation, a variety of different input resolutions, and dataset scale.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results on two different QA datasets, e.g., Answerbag and Jeopardy!",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 57,
+                            "text": "Answerbag",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 62,
+                            "end": 70,
+                            "text": "Jeopardy",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We describe the collection methodology, characteristics of the dataset, and how to obtain it.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To demonstrate the compatibility of ACNet for non-Euclidean data, we adapt our proposed ACNet to tackle such a vertex classification task on the Cora benchmark , which is a large network of scientific publications connected through citations.",
+                    "annotation_spans": [
+                        {
+                            "start": 145,
+                            "end": 159,
+                            "text": "Cora benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the supplementary material, we also show the critic loss curves for the CIFAR-10 dataset, which provides a good indication of the increased stability and improvement in convergence of POT-GAN.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 91,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When the pool size is large, instances discarded by BCISO have some effect, and the performance on the CISO training set becomes higher than that of BCISO.",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 120,
+                            "text": "CISO training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each sentence in the WSJ corpus, PropBank [7] contains zero or more hand-annotated predicate-argument structures, each of which contains zero or more arguments.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 35,
+                            "text": "WSJ corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to show our method can adapt to individual subjects, we perform personalized gaze estimation on MPIIGaze dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 121,
+                            "text": "MPIIGaze dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the F1 score on the ProceL dataset, where to see the effect of each hyperparameter, we have fixed the values of the other two (these fixed values depend on the task).",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 40,
+                            "text": "ProceL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Cora dataset consists of 2708 scientific publications classified into one of seven classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "Cora dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As we have stated that for both datasets CCA achieves very similar performances in both directions of the retrieval tasks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, in , provided three retinal images of the DRIVE dataset, our approach is compared against DRIU and CRFs.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 62,
+                            "text": "DRIVE dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Roughly speaking, a protocol is sampling resilient if its output on any dataset S can be approximated well by its output on a random subset of half of the players.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a collection S, despite the absence of monotonicity on the anomaly score precludes an upper-bound on \u2126(S )",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Test collections were growing from thousands to millions of documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To overcome the limitation of our experimental datasets, we train the model with Visual Genome dataset , which is a large dataset containing very diverse content that could cover most instances appeared in our experimental datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 102,
+                            "text": "Visual Genome dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the evaluation, we used the following three standard multi-class text classification datasets: WebKB, Reuters-21578 and 20 Newsgroups.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 104,
+                            "text": "WebKB",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 106,
+                            "end": 119,
+                            "text": "Reuters-21578",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 124,
+                            "end": 137,
+                            "text": "20 Newsgroups",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We create two views for each dataset as we done for the Spam Email dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 74,
+                            "text": "Spam Email dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We propose a new deep learning method, namely XML-CNN, which combines the strengths of existing CNN models and goes beyond by taking multi-label co-occurrence pa erns into account in both the optimization objective and the design of the neural network architecture, and scales successfully to the largest XMTC benchmark datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 305,
+                            "end": 328,
+                            "text": "XMTC benchmark datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then for each task and each image of the test set, we omit all detections with detection confidence lower than 0.1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To train our networks, we collect a new 3D human motion dataset containing diverse body, hands, and face motions from 40 subjects.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have shown that our model outperforms the traditionally used method using two different real world datasets, a citation and a hospital network.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , the data collection procedure consists of two parts: I. User Study and II.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We generate the GT perspective maps for datasets UCF CC 50 and ShanghaiTech SHA using our proposed way.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 58,
+                            "text": "UCF CC 50",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 63,
+                            "end": 79,
+                            "text": "ShanghaiTech SHA",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For Extended Yale dataset B, we use all 38 classes for evaluation with 50% \u2212 50% train-test split 1 and 70% \u2212 30% train-test split 2.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 25,
+                            "text": "Extended Yale dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We take the Enron email collection  hosted by CMU 2 as our email collection; this collection contains messages but no  attachments.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 34,
+                            "text": "Enron email collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After enforcing these requirements, our dataset contains a total of 850 distinct user profiles.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We refer to the collected training and testing sets as CUFED5, which would largely facilitate the research on RefSR and provide a benchmark for fair comparison.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 61,
+                            "text": "CUFED5",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Airline delay dataset The dataset consists of details of all commercial flights in the USA over 20 years.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 21,
+                            "text": "Airline delay dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Scalability: To test scalability, we generate different datasets by subsampling from a large EHR dataset (more details about the data in Section 5.1).",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 104,
+                            "text": "EHR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first randomly split the clickthrough corpus into two subsets, training (99.5%) and validation (0.5%).",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 47,
+                            "text": "clickthrough corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "re-scales the channels in feature maps on the fly and achieves state-of-the-art results on Im-ageNet classification dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 123,
+                            "text": "Im-ageNet classification dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ":t (where we adopt the usual notation \"1 : t\" to denote the collection of integers s such that 1 \u2264 s \u2264 t), an action A t \u2208 A.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the first step, it leverages on the global context obtained from Wikipedia and Web N-Gram corpus to partition tweets into valid segments (phrases) using a dynamic programming algorithm.",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 99,
+                            "text": "Wikipedia and Web N-Gram corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, because many OSNs are public by default and provide APIs to access their content, they have become good candidates for data collection to be used to study problems such as user/ topic modelling, user identification or information leakage.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Along with the dataset, we have designed a suite of new metrics, which include consistency, validity, plausibility, grounding and distribution scores, to complement the standard accuracy measure commonly used in assessing methods' performance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The astro-ph dataset classifies abstracts of papers from the physics ArXiv according to whether they belong in the astro-physics section; CCAT is a classification task taken from the Reuters RCV1 collection; and cov1 is class 1 of the We ran ASDCA with values of m from the set {10 \u22124 n, 10 \u22123 n, 10 \u22122 n}.",
+                    "annotation_spans": [
+                        {
+                            "start": 183,
+                            "end": 206,
+                            "text": "Reuters RCV1 collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "astro-ph dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", we show example TWITTER Dataset:.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 33,
+                            "text": "TWITTER Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We performed extensive empirical evaluation on large-scale versioned document collections using real-world keyword queries with temporal constraints at varying granularities.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Relevant also are animations created from big data sets of images, e.g., personal photo collections of a person where the animation shows a transformation of a face through years , or Internet photos to animate transformation of a location in the world through years , e.g., how flowers grow on Lombard street in San Francisco, or the change of glaciers over a decade.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then report average error over 1000 trials in the 10 label case and 100 trials in the 100 label case for each combination of data set and algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, it is very difficult to obtain the labels (e.g. hashtags) for continuously growing text collections like social media, not to mention the fact that on Twitter application, many hashtags refer to very broad topics (e.g. \"#in-ternet\", \"#sales\") and therefore could even be misleading when used to guide the topic models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The other leading predictors were LBG, P DC and P M I, each used for three out of the four collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We randomly choose 1000 images from the test set, and from these chosen images, we collect 100 query sentences by sampling four random captions from 25 randomly chosen images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is done by re-synthesizing a dataset using the EEG from the Akimpech dataset and sentences from the English Wikipedia dataset from Sutskever et al. .",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 81,
+                            "text": "Akimpech dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 130,
+                            "text": "English Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that in our experiments, statistical significance is measured based on AP and precision values of all the users in the test set, according to Wilcoxon signed rank significance test with p<0.01.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The SR collection contains 272 interviews with survivors of the Holocaust, used previously by the Cross Language Evaluation Forum's cross-language speech retrieval",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "SR collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the extremely small datasets, samples should be selected only using IPM.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We believe it important to show results on in-the-wild videos to ensure the generalization beyond this particular dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second dataset, termed SearchOnly dataset, consists of search sessions that did not turn into question sessions.",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 45,
+                            "text": "SearchOnly dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection consists of 20 topics adapted from TREC ad hoc retrieval topics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows some qualitative results of our spatiotemporal model for different sequences from YouTube-VOS validation set.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 114,
+                            "text": "YouTube-VOS validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the counting accuracy, we adopt the mean average error (MAE) and the mean squared error (MSE) metrics, which are defined as: where M is the number of images in the test set, and C gt",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Iteratively, we conduct the feature selection and rule mining until each sequence in the training data set SDB matches",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Botanical text collections contain documents that describe various plant characteristics that are critical to plant species identification.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A TRecgNet is proposed to transfer complementary cues through a label-free modality translation process, which improves modality-specific classification task in an end-to-end manner and achieves state-of-the-art performance on RGB-D indoor scene benchmarks.",
+                    "annotation_spans": [
+                        {
+                            "start": 227,
+                            "end": 256,
+                            "text": "RGB-D indoor scene benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, in the streaming data setting where no estimate on the data complexity exists, it is impractical to perform cross-validation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since there is no ground truth labels of users in this data set, we adopt the following five evaluation measures: 1) Cross-Dimension Network Validation (CDNV ) , which evaluates how well the cluster structure learnt from one or more types of links fits the network of the other type of links.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We investigate the following rewriting methods in our experiments: Synonyms: replace a word with its shortest synonym from Word-Net 3.0 1 , generating the correct full word form based on the CMU English morphology database (e.g. \"vehicle\" \u2192\"car\").",
+                    "annotation_spans": [
+                        {
+                            "start": 191,
+                            "end": 222,
+                            "text": "CMU English morphology database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given the training dataset S, we are interested in minimizing the negative log-likelihood:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ActivityNet Captions  dataset, that we experiment with, has long (over 13 words on average) and diverse descriptions with varied grammatical structures.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 33,
+                            "text": "ActivityNet Captions  dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second kind of data are image data, including Columbia object image library ( coil)  and the Japanese Female Facial Expression ( jaffe) Database 1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 148,
+                            "text": "Columbia object image library ( coil)  and the Japanese Female Facial Expression ( jaffe) Database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we extract term relations from the document collection automatically.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the performance of our proposed S 2 ConvSCN, we conduct experiments on four benchmark data sets: two face image data sets, the Extended Yale B  and ORL , and two object image data sets,",
+                    "annotation_spans": [
+                        {
+                            "start": 139,
+                            "end": 154,
+                            "text": "Extended Yale B",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 160,
+                            "end": 163,
+                            "text": "ORL",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Except for MNIST, all data sets were taken from the UCI repository.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 16,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 52,
+                            "end": 66,
+                            "text": "UCI repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation on the Labeled Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 33,
+                            "text": "Labeled Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, more than 20% edges on Facebook data set are modified in the k-anonymizaiton method of  when k = 25.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 53,
+                            "text": "Facebook data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the existing datasets present various challenges to some extent, they still have some limitations in the following two aspects.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the SDWeb dataset with more than 82 million nodes and 3.6 billion edges, MSEIGS takes only about 2.7 hours on a single-core machine while Matlab's eigs function takes about 4.2 hours and randomized SVD takes more than 6 hours.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "SDWeb dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the case of the DBLP data set, the papers collected between 1996 and 2003 are used for initialization, while we treat all the later publications as new incoming objects till 2008.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 33,
+                            "text": "DBLP data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first evaluation is based on the LETOR datasets , which include manual relevance assessments.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 51,
+                            "text": "LETOR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All the running times are averaged from 10 runs on the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To provide a better understanding of the behavior of the learned metrics, we applied PLML LMNN-MM, CBLML and GLML, on an image dataset containing instances of four different handwritten digits, zero, one, two, and four, from the MNIST dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 229,
+                            "end": 242,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first three datasets came from the large 20Newsgroups dataset , papers  is a subset of the DBLP database linking authors with paper titles, and emap  and hughes  are both microarray datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 65,
+                            "text": "20Newsgroups dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 95,
+                            "end": 108,
+                            "text": "DBLP database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Sample topics discovered in the Auto dataset for the location tag \"fort worth, texas\" and gender \"female\" and \"male\".",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 46,
+                            "text": "Auto dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We study the impact of different splits of the validation set (see Section 5.1).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on UCI datasets show that our method outperforms state-of-the-art imitation learning methods in dynamic feature selection and two static feature selection methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 36,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MSN dataset provides relevance judgment labels ranging from 0 (irrelevant) to 4 (perfectly relevant).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "MSN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, the use of K = 5000 for retrieving on ClueWeb09 has been used by various previous work on the same test collection  and empirically veri ed in .",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 62,
+                            "text": "ClueWeb09",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recently, more visually stimulating and complex datasets have emerged which contain actionable replicas of 3D indoor scenes .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Contrary to previous works, we did not re-train our network on AFLW before evaluating it on that dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 68,
+                            "text": "AFLW ",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For both the Genetics and Biochemistry data sets, the initialization set includes all articles in the first 4 years (1996 to 1999), and then we continuously receive new publications from 2000 to 2004.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 48,
+                            "text": "Genetics and Biochemistry data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The X-axis shows the ratio of the number of starting posts versus follow-up posts for users who answered questions in our datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the performance of our GLCN method across different number of convolutional layers on MNIST dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 92,
+                            "end": 105,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform experiments using three different real-world datasets: the Reuters RCV1 corpus , the Higgs detection dataset , and the Forest Cover dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 89,
+                            "text": "Reuters RCV1 corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 96,
+                            "end": 119,
+                            "text": "Higgs detection dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 130,
+                            "end": 150,
+                            "text": "Forest Cover dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, multi-label learning with incomplete label assignment has also been studied on small/moderate-size datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is generated in a similar manner to the Weibo dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 65,
+                            "text": "Weibo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compose each testing image with 25 random background images from PASCAL VOC  to form a testing dataset with 1000 images.",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 78,
+                            "text": "PASCAL VOC",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate our approach, we have applied it to two real-world (sensor network temperature  and San Francisco Bay area traffic ) and one artificial (samples from ALARM Bayesian network ) datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 90,
+                            "text": "sensor network temperature",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 96,
+                            "end": 126,
+                            "text": "San Francisco Bay area traffic",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To test the sparsity issue, we sample 20,000 users with their ratings from Netflix dataset randomly and sample again to generate four datasets with different sparsity.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 90,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The New York Times corpus was used as the recommendation candidates, and the performance of ELSA was compared with four other methods: Bag-of-Words, Latent Dirichlet Allocation (LDA) , Explicit Semantic Analysis (ESA) , and Probabilistic Explicit Semantic Analysis (PESA) .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 25,
+                            "text": "New York Times corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "dataset which follows the guidelines for measuring social intelligence outlined in the previous section (Section 3).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In summary, our key contributions are: \u2022 A data collection technique and dataset of paired lighting reference spheres and background images (200k examples) for training a lighting estimation algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first construct a directed weighted graph G = U, E to model user relations in the data collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each 22050 Hz-sampled monaural song in the data set, we compute the first 13 MFCCs for each half-overlapping short-time (\u223c23 msec) segment.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This makes sense, as dialogs are self-referential; in the AVSD dataset, 55.2% of the questions contain co-reference words such as her, they, and it.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 70,
+                            "text": "AVSD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The likelihood distribution, P ({[\u03b1]} | \u03d5), is the probability of observing the trajectories within the data set given the candidate specification.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data set had 1380 fields, with 1231 continuous fields, 124 nominal fields and 25 ordinal fields, respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Ignoring purchase behaviors makes the EC search sessions identical to a web document dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In all aforementioned datasets, questions are asked about a single image.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also note that the ratio between upper and lower quadratic bound loosely corresponds to the condition number of a quadratic function -the ratio between the largest and smallest eigenvalue of the matrix involved in the optimization problem.  : Experiments with simulated delay on the TREC dataset (left) and on a propietary dataset (right).",
+                    "annotation_spans": [
+                        {
+                            "start": 283,
+                            "end": 295,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we adopt reviews on Amazon.com as our corpus to evaluate the AHV system.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset used in our experiments is collected by crawling the Epinions.com site on Jan 2009.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on the ImageNet dataset shows the superiority of interacted bitcount directed by channel-wise interactions.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 35,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Feature selection helped significantly on the mnist-back-image data set, but only slightly on the mnist-backrandom data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 71,
+                            "text": "mnist-back-image data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The IDDTW K-nearest neighbor algorithm used to query the dataset is described in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We run our experiments with all documents in the collection for our four group finding models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second study aims at quantifying the quality of the generated recipes (ingredients and instructions) with respect to (1) the real recipes in the dataset, and (2) the ones obtained with the retrieval baseline .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", T L } denote the L datasets we will be using as testing target datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Timings reported for Krimp include mining and sorting of the candidate collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 27,
+                            "text": "Krimp ",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": IR research paper corpora used in this work.",
+                    "annotation_spans": [
+                        {
+                            "start": 5,
+                            "end": 27,
+                            "text": "research paper corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact, beyond providing new metrics, GQA can even directly support the development of more interpretable models, as it provides a sentence-long explanation that corroborates each answer, and further associates each word from both the questions and the responses with a visual pointer to the relevant region in the image, similar in nature to datasets by Zhu et al. ,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", we demonstrate that this filtering approach compares favorably against current methods across a variety of benchmark sequence data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is important to note that the state-of-the-art reputationbased algorithm (YZLM) is not very robust in Amazon and Bookcrossing datasets, and its robustness is even worse than AA.",
+                    "annotation_spans": [
+                        {
+                            "start": 116,
+                            "end": 137,
+                            "text": "Bookcrossing datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The size of the ClueWeb09 [2] corpus, at approximately one billion documents, is an indication of this.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 36,
+                            "text": "ClueWeb09 [2] corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train Grid R-CNN on Pascal VOC dataset for 18 epochs with the learning rate reduced by 10 at 15 and 17 epochs.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 41,
+                            "text": "Pascal VOC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "[this could be checked by computing F max for increasing fractions of the overall dataset size].",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the amount of common subgraphs, we first estimate the significant subgraphs of the target dataset T (1) by its frequent subgraphs in an unsupervsied setting, or (2) by the significant subgraphs mined with a limited number of labels in a supervised setting.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We are aware of two previous approaches for positional indexing of versioned collections .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the performance of our prediction algorithm, TraPHic, on the standard datasets and also introduce a new dense, heterogeneous traffic dataset corresponding to urban Asian videos and agent trajectories.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To conduct comparison experiments, we also created a spherical MNIST dataset , spherical SYNTHIA dataset , and spherical Stanford2D3D",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In other words, a run must summarize 10 days of material from the TREC KBA stream corpus  for each topic.",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 89,
+                            "text": "TREC KBA stream corpus ",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The minimum, maximum, and median percentage of 50 SRs in the dataset are also reported.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform additional experiments on Market-1501 dataset  and show results in the supplementary material.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 56,
+                            "text": "Market-1501 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To demonstrate the scalability of planar Ising models, we designed a simple boundary detection task based on images from the GrabCut Ground Truth image segmentation database .",
+                    "annotation_spans": [
+                        {
+                            "start": 125,
+                            "end": 173,
+                            "text": "GrabCut Ground Truth image segmentation database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our dataset, a user has rated only a very small percentage (about 0.06%).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The main difference with our approach concerns the selection of the sample dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, current VQA datasets (e.g., ) are focused mainly on recognition, and most ques- * Work done during internship at Allen Institute for AI Developed apparently simultaneously by toymakers ... and named after President Theodore \"Teddy\" Roosevelt, the teddy bear became an iconic children's toy, celebrated in story, song, and film.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 29,
+                            "text": "VQA datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are many ways for our simulator to generate datasets of unmeasured confounding.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Vagueness, on the other hand, is due to the fact that the user is not able to formulate her information need in a precise way; this leads to an iterative search process (which might even occur with databases for formatted data, like e. g. in product search in a Web",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to compare the variance of LambdaMART and BL-MART models, we use the MSLR-WEB10K data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 98,
+                            "text": "MSLR-WEB10K data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent results on large scale datasets outline the potential of the target distribution loss .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is also shown that these models yield better out-of-distribution detection performance on the CIFAR-10 and CIFAR-100 datasets than Prior Networks trained using forward KL-divergence.",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 128,
+                            "text": "CIFAR-10 and CIFAR-100 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Second, the Enron dataset had fewer records, and the inverted lists were relatively shorter.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 25,
+                            "text": "Enron dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test GPFM on five different benchmark contextual datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Another three were the VISPR-trained method  fine-tuned to each of the following three datasets: VizWiz-Priv, VizWiz-Priv-HoleMean, and VizWiz-Priv-Uncorrupted.",
+                    "annotation_spans": [
+                        {
+                            "start": 97,
+                            "end": 108,
+                            "text": "VizWiz-Priv",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 110,
+                            "end": 130,
+                            "text": "VizWiz-Priv-HoleMean",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 136,
+                            "end": 159,
+                            "text": "VizWiz-Priv-Uncorrupted",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These references are representativ state-of-the-art for deep and shallow architectures on these data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For smaller data sets this effect does not materially affect performance, but as we move to larger data sets like Adult, the method starts to fail catastrophically.",
+                    "annotation_spans": [
+                        {
+                            "start": 114,
+                            "end": 119,
+                            "text": "Adult",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct a comprehensive evaluation of our algorithm on synthetic data and real learning to rank data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "TCC features achieve competitive performance with the other methods on the Penn Action dataset while outperforming them on the Pouring dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 94,
+                            "text": "Penn Action dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 127,
+                            "end": 142,
+                            "text": "Pouring dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(W ) contains a single polytope given by the convex hull of the observed predictors in the dataset:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As dataset we have used the Reuters-21578 corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 48,
+                            "text": "Reuters-21578 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the Snippet dataset, GPU-DMM achieves the best topic coherence across all settings, and the improvement over other baseline models are statistical significance at the 0.01 level.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 22,
+                            "text": "Snippet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With these settings, we outperform state-of-the-art results on the WIDER dataset demonstrating the effectiveness of FA-RPN.",
+                    "annotation_spans": [
+                        {
+                            "start": 67,
+                            "end": 80,
+                            "text": "WIDER dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection of inputs having the same face allocation, defined as \u2200r \u2208 {1, . . . , R} , \u03c9 r = {x \u2208 X : r k (x) = r}, constitutes the r th partition cell of the unit k PD (recall (2) and Lemma 1).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also tested subsets of Yale Face dataset with different numbers of samples or features ranging from 100 to 1000 to report the average run time of GLNP in .",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 43,
+                            "text": "Yale Face dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation collections for this task have been created by crowdsourcing relevance judgements .",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 22,
+                            "text": "Evaluation collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the evaluation, we follow  by training on one dataset and evaluating the confidence on other out of distribution datasets and noise images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used this method on the Family Tree Problem using a train, test and validation sets built in the following way.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Training and testing datasets are independent so that term distribution over these two sets might be different, and it is possible that, discriminative terms generated in the training set might not even exist in the testing set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "on RCV1 dataset are improved by 11.1% and 8.8% respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 15,
+                            "text": "RCV1 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", v n , corresponding to the n cameras, and e ij \u2208 E ifF ij belongs to the collection of the estimated fundamental matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In a practical test setting,  : Best-10 CIDEr and SPICE accuracy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As in the siamcompetition2007 dataset, the observed differences are statistically significant in most cases.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 37,
+                            "text": "siamcompetition2007 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Instead of using positive and negative sentiment words as seeds, we start with comments that are manually annotated as Good or Bad (from SemEval-2016 Task 3 datasets ).",
+                    "annotation_spans": [
+                        {
+                            "start": 137,
+                            "end": 165,
+                            "text": "SemEval-2016 Task 3 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Like the KITTI benchmark, we compute the average precision (AP) at a 0.7 IoU for vehicles and a 0.5 IoU for bikes and pedestrians.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 24,
+                            "text": "KITTI benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "So far we have been concentrating on reshaping multiple annotated datasets (for training classifiers) into domains for adapting to test datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the experiments with the automatic content extraction (ACE) corpus and a Korean news corpus, the proposed model outperformed the previous systems based on SVM tree kernels even though it used more shallow linguistic knowledge.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 94,
+                            "text": "Korean news corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results on two different QA datasets: Answerbag and Jeopardy!",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 51,
+                            "text": "Answerbag",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 56,
+                            "end": 65,
+                            "text": "Jeopardy!",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The mAP(%) difference between target and translated features on three public datasets: Holidays (Green), Oxford5k (Blue) and Paris6k (Brown) in the first, second and third blocks, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 95,
+                            "text": "Holidays",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 113,
+                            "text": "Oxford5k",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 125,
+                            "end": 132,
+                            "text": "Paris6k",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show the top 30 closest matching images from the LabelMe database based on the L1-norm distance, which is robust to outliers.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 68,
+                            "text": "LabelMe database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One common feature of FCIM algorithms is that they need to exploit a global knowledge on the dataset at any time of the computation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Once we have learned a calibration function for each representation, we convert the vector of scores for a test set song to a vector of approximate posterior probabilities.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also make the second group of datasets by ourselves for various training and test purposes and also for opening to public afterwards.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we use 10,000 randomly selected SIFT descriptors to learn KPCA-based local feature maps, which takes about 12 hours for the training and testing sets on the full Scene-15 dataset, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 175,
+                            "end": 191,
+                            "text": "Scene-15 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It should also be noted that when mining datasets of elicited responses such as user ratings, the values themselves are notoriously unreliable (users show poor repeatability, with ratings wandering up to 40% of the scale from day to day 3 ), so a good low-rank approximation of the data has higher probability of generalization than a medium-rank model that perfectly reconstructs the data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There is some amount of recent work on indexing and searching in versioned document collections that can be split into three subsets, as follows.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This modification yields an architecture that offers state of the art performance onvarious publicly available datasets improving results by 10-20%.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the toy dataset, we randomly sampled an additional 50,000 users, and for each of the users compiled a list of movies the user has rated and then discarded the actual ratings.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 19,
+                            "text": "toy dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show results of the lift curves for various models on this data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Smaller more focused collections, such as a small digital library, might have queries submitted that, according to Wikipedia or WordNet, are ambiguous, but only one interpretation of the query is actually present in the collection, meaning the ambiguity can be ignored.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that a compact (16-dimensional) expression embedding space can be learned by training a deep network with the proposed FEC dataset using triplet loss .",
+                    "annotation_spans": [
+                        {
+                            "start": 127,
+                            "end": 138,
+                            "text": "FEC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our isometric feature mapping procedure, or isomap, is able to reliably recover low-dimensional nonlinear structure in realistic perceptual data sets, such as a manifold of face images, where conventional global mapping methods find only local minima.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Ohsumed data set is available from the LETOR website 1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "Ohsumed data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For NMF, FPMC and our HRM 6 methods, we run several times with random initialization by setting the dimensionality d \u2208 {50, 100, 150, 200} on Ta-Feng and BeiRen datasets, and d \u2208 {10, 15, 20, 25} on T-Mall dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 142,
+                            "end": 169,
+                            "text": "Ta-Feng and BeiRen datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 199,
+                            "end": 213,
+                            "text": "T-Mall dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train the network using a synthesized dataset of images with visual motifs (see ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly, in our experiments, we use a subset of these datasets (views) as source domains for training classifiers and the rest of the datasets (views) as target domains for testing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "data set since people leave rich opinions on them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "describes the Flickr data set used in the experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 29,
+                            "text": "Flickr data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We split the dataset into 70% train, 10% validation, 10% test and 10% challenge, making sure that all the questions about a given image appear in the same split.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For large data sets, stochastic estimates of parameter gradients can be obtained from a mini-batch of data examples .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, the genre database does not have the record for \"Monster a-Go",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For all datasets, the  performance is averaged over the 5 different folds.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since CosFace employs a private training data, we retrain CosFace on our MS1MV2 dataset with ResNet100.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 87,
+                            "text": "MS1MV2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The FLIC dataset contains many frames with more than a single person, while the joint locations from only one person in the scene are labeled.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "FLIC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With this maximal IFS, we also run the clustering-based approach on this IMDB dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 85,
+                            "text": "IMDB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Though LMART and RM get relatively high NDCG@5 on the training dataset, they are outperformed by the runner-up method MART due to over-itting.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our full algorithm (C) overcomes this limitation by jointly learning from both the AMOS dataset and the TLVDB dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 95,
+                            "text": "AMOS dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 104,
+                            "end": 117,
+                            "text": "TLVDB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These errors are shown in  along with the running time for ASES dataset in the bottom right subfigure.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 71,
+                            "text": "ASES dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The importance of this model cannot be understated -it has been chosen to create graphs for the Graph500 supercomputer benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 128,
+                            "text": "Graph500 supercomputer benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset, SR # substantially improves the coverage of JSR/LSR (\u223c0.73) and SR ++ /RS (\u223c0.81) to \u223c0.95.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our perturbations have simple and shift-invariant patterns, yet achieved high fool ratio on various pairs of architectures and datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The feasibility of the proposed method is demonstrated through classification, detection, and semantic segmentation tasks with synthetic and real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, apply independent random permutations to components 2 through p of the p-component attribute vector associated with each object to obtain a single randomized dataset D r .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "24 humans in the KG and database fields, who have experience in similar gold standard construction, were invited and each task was scored by three judges.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the meantime, we offer a new dataset for reflection removal that provides the two DP sub-aperture views.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These four competitions are the accuracy competitions with the largest number of submissions and serve as representative examples for a typical accuracy competition in the MetaKaggle dataset (see  for information about these competitions).",
+                    "annotation_spans": [
+                        {
+                            "start": 172,
+                            "end": 190,
+                            "text": "MetaKaggle dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To encode the grammatical function of each token, partof-speech tagging was done for each token within a sentence using the Apache OpenNLP 10 implementation of a max-ent POS tagger, using the Penn English Treebank POS tag dictionary 11 that comprises of 36 tags.",
+                    "annotation_spans": [
+                        {
+                            "start": 192,
+                            "end": 232,
+                            "text": "Penn English Treebank POS tag dictionary",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The two test collections used were the TREC AP89 collection (TIPSTER disk 1) and the FSupp Collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 101,
+                            "text": "FSupp Collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 39,
+                            "end": 76,
+                            "text": "TREC AP89 collection (TIPSTER disk 1)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct an additional simulation experiment on the KAIST dataset with consideration of imaging noise.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 67,
+                            "text": "KAIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To demonstrate the versatility of our algorithm, we also conducted a multi-class classification experiment using the entire MNIST data set (all ten digits, so 60000 training data and 10000 testing data).",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 138,
+                            "text": "MNIST data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have illustrated the results for 100and 200-dimensional data sets with u = 6.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We removed the stop words and only retained nouns, verbs, and adjectives in DailyDialog  and Sina Weibo Conversation Corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 123,
+                            "text": "DailyDialog  and Sina Weibo Conversation Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MS-COCO , the SUN database , DeepFashion , MINC , and Places  are all examples where an order of magnitude separates the number of images in the most versus the least common classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 7,
+                            "text": "MS-COCO",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 10,
+                            "end": 26,
+                            "text": "the SUN database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 29,
+                            "end": 40,
+                            "text": "DeepFashion",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 43,
+                            "end": 47,
+                            "text": "MINC",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 54,
+                            "end": 60,
+                            "text": "Places",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An independent held-out validation set with size n \u2032 is denoted similarly as .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After running for 2 days on the data set, Automatic Frankensteining has converged and does not show any further improvement.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\"Category B\" ClueWeb09 dataset  which includes roughly 50 million English pages crawled from the Web.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 30,
+                            "text": "ClueWeb09 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The backbone parameter are initialized by image classification task on ImageNet dataset , other new parameters are initialized by He (MSRA) initialization .",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 87,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": The outgoing weights of the hidden units for a network containing 4 VQs with 6 units in each, trained on the spline dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 111,
+                            "end": 125,
+                            "text": "spline dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We see Slim is still faster than Krimp for 9 datasets, including huge improvements for Chess (k-k), DNA amplification, Ionosphere and Mushroom.",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 98,
+                            "text": "Chess (k-k)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 100,
+                            "end": 117,
+                            "text": "DNA amplification",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 134,
+                            "end": 142,
+                            "text": "Mushroom",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 129,
+                            "text": "Ionosphere",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The maxNDCG value achieved using the baseline PageRank corpus selection method to select 1.5 billion documents is 0.6597.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 61,
+                            "text": "PageRank corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method achieves the best-published result on the Office-31 benchmark  and competitive performance compared to the state-of-the-art on the challenging VisDA-2017 benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 72,
+                            "text": "Office-31 benchmark",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 154,
+                            "end": 174,
+                            "text": "VisDA-2017 benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the discovered configurations (solid green and yellow boxes) and foreground estimates (dashed magenta boxes) that have high degree in graph G P for all 20 classes in the PASCAL dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 176,
+                            "end": 190,
+                            "text": "PASCAL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Twitter dataset  contains user ids and links between users, directed from the user to his/her followers.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 19,
+                            "text": "Twitter dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unfortunately we couldn't explore this deeper because the dataset is anonymized and actual urls are not available.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We further evaluate progressive networks on another dataset  which includes 12,600 rainy images for training and 1,400 rainy images for testing (Rain1400).",
+                    "annotation_spans": [
+                        {
+                            "start": 145,
+                            "end": 153,
+                            "text": "Rain1400",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "More challenging is the task of discriminating 4s from non-4s in the USPS digit database: the data are 256-dimensional, and there are 7291 training and 2007 test points.",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 88,
+                            "text": "USPS digit database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All the datasets used the bag-of-words representation with word-level features, and were pre-processed using stop-word removal, TFIDF weighting (for vMF only, since LDA and DCM can handle only counts), and removal of very high-frequency and low-frequency words.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Notice that the number of training samples in the TPS dataset is also much smaller than that in the Last.fm-1K dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 61,
+                            "text": "TPS dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 100,
+                            "end": 118,
+                            "text": "Last.fm-1K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The average sparse ratios in FL, which is the ratio of the number of near-zero entries to the number of entries in W, over different data sets in ZRL and GZRL are 0.49 and 0.54, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 146,
+                            "end": 149,
+                            "text": "ZRL",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 154,
+                            "end": 158,
+                            "text": "GZRL",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Fine-grained and binary root-level prediction performance for the Stanford Sentiment Treebank task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments with TREC Ad-hoc and Legal Track datasets demonstrate that with high quality manual expansion, this diagnostic approach can reduce user effort by 33%, and produce simple and effective structured queries that surpass their bag of word counterparts.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 53,
+                            "text": "TREC Ad-hoc and Legal Track datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We performed experiments on the ClueWeb09 collection, a best-first web crawl from early 2009.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 52,
+                            "text": "ClueWeb09 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We note that in a concurrent paper  the same scheme has been proposed, where they use as p out existing large image datasets, whereas we favor an agnostic approach where p out models a certain \"noise\" distribution on images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "PCA-based duplicate removal is conducted within each scene category in the Places database and across the same scene category in the SUN database, which ensures that Places and the SUN do not contain the same images, allowing us to combine the two datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 90,
+                            "text": "Places database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 133,
+                            "end": 145,
+                            "text": "SUN database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While, in the past, query assistance tools would analyze the document corpus in order to identify phrases that could serve as alternate queries, the tremendous growth in Web search engine traffic allowed these tools to mostly rely on real user-issued queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This collection contains the 3GB AQUAINT corpus of newswire text in English, 50 topics and a set of relevance judgments.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 47,
+                            "text": "3GB AQUAINT corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this evaluation, we consider only competitors trained without any supervision from ground truth labels (e.g., synthetic datasets ) involved in any phase of the training process .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The small NORB normalized-uniform dataset contains 24,300 training examples and 24,300 test examples.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 41,
+                            "text": "NORB normalized-uniform dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "plots the confusion matrices of two clustering solutions for the la1 data set,",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 77,
+                            "text": "la1 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We found that the original dataset is contaminated with conflicted ratings where a tuple of (user, item, context)) corresponds to two outputs (ratings) that differ by at least 2.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , STA outperforms previous open set methods on Digits dataset with different evaluation metrics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To the best of our knowledge, there is no competitive generative models to classify digits on SVHN dataset with full labels.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 106,
+                            "text": "SVHN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Latent Dirichlet Allocation (LDA) model  is an popular technique to identify latent topic information from a large document collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is worth mentioning that in some previous works they usually reported mAP values for the whole data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, as the CNN is very sensitive to the training dataset, several methods  considered using the refined dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the CIFAR-10 dataset, we observe that in many cases of animal classes, SparseFool tends to perturb some universal features around the area of the head (i.e., eyes, ears, nose, mouth etc.), as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 24,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, the bottom-up clustering scheduler merges each element (e.g., a document in a collection) with its closest neighboring cluster.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore this thresholding method gives each distinct binary code roughly equal probability of occurring in the document collection, thus achieves the best utilisation of the hash table.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Oxford Robotcar dataset, we add a connection between two images to E if the distance between the images is smaller than a threshold of 12m.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 31,
+                            "text": "Oxford Robotcar dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To test our reparameterization method for the application of supervised classification, we consider the CIFAR-10 data set of natural images .",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 121,
+                            "text": "CIFAR-10 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hence, we first compare on some small, wellknown UCI benchmark datasets: Anneal, Breast, Heart, Iris, Led7, Nursery, Page blocks, Pima, Tic-tac-toe and Wine.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 79,
+                            "text": "Anneal",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 81,
+                            "end": 87,
+                            "text": "Breast",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 94,
+                            "text": "Heart",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 96,
+                            "end": 100,
+                            "text": "Iris",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 102,
+                            "end": 106,
+                            "text": "Led7",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 108,
+                            "end": 115,
+                            "text": "Nursery",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 117,
+                            "end": 128,
+                            "text": "Page blocks",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 130,
+                            "end": 134,
+                            "text": "Pima",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 136,
+                            "end": 147,
+                            "text": "Tic-tac-toe",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 152,
+                            "end": 156,
+                            "text": "Wine",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 49,
+                            "end": 71,
+                            "text": "UCI benchmark datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown by previous TREC experimentation, BM25 usually provides very effective retrieval performance on the TREC collections that are used in  BM25.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 125,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The prior term F B p is used to restrict our output to a feasible human pose distribution (especially for rotation around bones), defined as: where A B \u03b8 and \u00b5 B \u03b8 are pose prior learned from CMU Mocap dataset , and w B p is a balancing weight.",
+                    "annotation_spans": [
+                        {
+                            "start": 192,
+                            "end": 209,
+                            "text": "CMU Mocap dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, we investigate the capacity of PR to distinguish malignant from benign skin lesions using a dataset of desorption electrospray ionization mass spectrometry imaging (DESI-MSI) of a common skin cancer, basal cell carcinoma (BCC)  (details in supplement).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on both TREC 2013 and 2014 Session track datasets demonstrate that SQLM does perform better than classical QLM for multi-query retrieval systems, and also show its potential of being further improved for session search.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 71,
+                            "text": "TREC 2013 and 2014 Session track datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our evaluation starts with a series of experiments with the validation set V to find the best parameters of each method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To validate the generalization ability, we also evaluate our proposed approach on the YorkUrban Line Segment Dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 116,
+                            "text": "YorkUrban Line Segment Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our simulator has many advantages over real datasets in evaluating causal discovery algorithms in the presence of these challenges.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The RAND method is better than unsupervised learning method on HTTP, Covtype, Mammography and Ann-thyroid datasets, when performance of Isolation Forest is not good.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 114,
+                            "text": "HTTP, Covtype, Mammography and Ann-thyroid datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our retrieval scenario is vertical search , in which content of a CQA sub-collection should be retrieved on top of general Web search.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the Cityscapes dataset, our method is the first one which runs in real time while maintaining a high accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 25,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This kind of neural network technology advanced the state-of-the-art of large-vocabulary CSR, which employs Hidden Marlcov Models (HMM), for the ARPA 1oo0-word Resource Management corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 145,
+                            "end": 186,
+                            "text": "ARPA 1oo0-word Resource Management corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Effectiveness: We show that PICS discovers quality clusters, bridges and outliers in diverse real-world datasets including YouTube and Twitter.",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 142,
+                            "text": "YouTube and Twitter",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The EaaS model enabled the MB2013 corpus to be much larger, about 243 million tweets .",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 40,
+                            "text": "MB2013 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset comprises about 40,000 annotated images and for each task between 1,100 and 9,900 objects have been marked by the annotators, where the number of different object categories varies between 6 and 30 for the different tasks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Indeed, for various types of features, i.e. the SPIN, SIFT, GLOH and PCA-SIFT features, and for a large variety of images from the COREL image collection, we have demonstrated that the similarity distances from one to other features, computed from L p norms, are Weibull-distributed.",
+                    "annotation_spans": [
+                        {
+                            "start": 131,
+                            "end": 153,
+                            "text": "COREL image collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, the data sets represent the diverse areas in which data miners apply their algorithms including finance, medicine, manufacturing and science.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 4, we then evaluate the approaches on the 2009 US Congressional Legislation dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 94,
+                            "text": "2009 US Congressional Legislation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To address these shortcomings, while retaining the visual and semantic richness of real-world images, we introduce GQA, a new dataset for visual reasoning and compositional question answering.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The digit classifier achieved a baseline error rate of 1.09% when tested on the uncorrupted MNIST test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 92,
+                            "end": 106,
+                            "text": "MNIST test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset resulted in 9 iterations of the outer-loop of the apriori association mining algorithm.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, Metzler and Croft  show that a sequential dependence model, which uses all ordered adjacent pairs of query terms, is highly effective for retrieval on TREC corpora.",
+                    "annotation_spans": [
+                        {
+                            "start": 165,
+                            "end": 177,
+                            "text": "TREC corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At first, they forward all images in the dataset into a pre-trained deep model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this study, we use the standard Boston Housing data, which is the housing data for 506 census tracts of Boston from the 1970 census, available from the UCI Machine Learning Database Repository:",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 54,
+                            "text": "Boston Housing data",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 155,
+                            "end": 195,
+                            "text": "UCI Machine Learning Database Repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also generated a number of synthetic data sets by constructing gaussian clusters with means which are randomly drawn from the unit cube.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A number of test collections have already provided much support for corresponding research tasks, such as WT10g , VLC2 , Gov2 , ClueWeb09, and ClueWeb12 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ESPNet-C. The loss function is almost the same except that there is no cross-entropy loss over the unlabeled dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform comparisons on the TC128  dataset, which consists of 128 challenging color sequences.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 44,
+                            "text": "TC128  dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and  show the classification results for all the test methods with topic number K varying from 20 to 100 on the two data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There has also been some amount of work on how to exploit these redundancies to better compress full-text indexes for versioned document collections .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we evaluate our late fusion CNN on two testing datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In all cases the SVM was trained and tested using the same training/test set sizes as described in  (25 documents per newsgroup for training and 475 for testing; the number of unlabeled documents fed to IDC was 500 per newsgroup).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on realworld datasets demonstrate the effectiveness of the proposed framework.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our test collection is derived from transaction logs from an Android application developed for the Long Night of Music 2012 (LNMusic) and the Long Night of Munich Museums 2012 (LNMuseum).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "During training, once a seed mesh v S is predicted, the corresponding camera and shape parameters h S are changed to generate new meshes {v N j }: The shape parameter s is sampled uniformly from the interval covering three times the standard deviation per Once the foreground hand region is rendered via f TRen , it is placed on random backgrounds obtained from the NYU depth database .",
+                    "annotation_spans": [
+                        {
+                            "start": 366,
+                            "end": 384,
+                            "text": "NYU depth database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "we introduce a large-scale RGB-D crowd counting dataset named ShanghaiTechRGBD for performance evaluation, and such a dataset would accelerate the study of detection-based approaches for crowd counting; iv)",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 78,
+                            "text": "ShanghaiTechRGBD",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we illustrate this by running the accelerated lazy algorithms for a large synthetic dataset with N = 15000 and d = 400, containing more than 10 8 comparison pairs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments for Computing Positions and Snippets from the Compressed Document Collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Corrected-Moment method  can also be considered as a learning-based method as it needs to train a corrected matrix for each dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The  : Normalized cumulated reward for different levels of graph noise (expected fraction of perturbed edges) and payoff noise (largest absolute value of noise term ) on the 4Cliques dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 174,
+                            "end": 190,
+                            "text": "4Cliques dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A data set with d dimensions, N records was denoted by Syn(d)D(N).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Reuters Data: This is the Reuters Corpus, Volume 1.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 40,
+                            "text": "Reuters Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The probability thresholds of using an object as the guiding object are chosen to maximize the F1 score on the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, we should reshape test datasets, conditioning on the identified domains from the training datasets -the goal is to discover latent domains in the test datasets that match the domains in the training datasets as much as possible.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To show that npTOT is able to capture the temporal variation in real documents, we performed experiments on three datasets:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For French, we trained a 5-gram LM from the monolingual dataset provided for WMT-12.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We experiment with a number of different implementations of the nearest-neighbor search defense strategy by varying: (1) the image database that is queried by the defense and (2) the features that are used as basis for the nearest-neighbor search.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both are defined in terms of modeling a single data set and have no principled means to relate to results from other sources in the process.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This happens on the HTTP dataset and the SMTP dataset, both of which have low outlier rates.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 32,
+                            "text": "HTTP dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 41,
+                            "end": 53,
+                            "text": "SMTP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Their approach was exclusively evaluated and optimized on the ACE2004, MSNBC and AQUAINT data sets on which the authors achieve state-of-the-art results.",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 98,
+                            "text": "ACE2004, MSNBC and AQUAINT data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can notice from  that the network actually improves the original performance on the source dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Bayesian nonparametric models provide flexible and accurate priors by allowing the dimensionality of the parameter space to scale with dataset sizes .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Algorithm 1 Heterogeneous Datasets Representation Out-of-sample Extension.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, methods that work well rely on the assumption that the training set and the test set are drawn from the same feature space and the same distribution, which does not always happen in realistic settings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The datasets were Animals with Attribute (AwA), CUB-200-2011 (CUB), SUN with Attribute (SUN) and Attribute Pascal and Yahoo (aPY).",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 40,
+                            "text": "Animals with Attribute",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 48,
+                            "end": 60,
+                            "text": "CUB-200-2011",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 68,
+                            "end": 86,
+                            "text": "SUN with Attribute",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 97,
+                            "end": 123,
+                            "text": "Attribute Pascal and Yahoo",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We achieve the best overall performance on four commonly-used benchmark datasets, largely surpassing the state-of-the-art methods by up to 14% for the MAE metric.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, the interval for HTTP dataset is set as [0.25%, 1%], and it is set as [0.5%, 2%] for SMTP dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 42,
+                            "text": "HTTP dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 98,
+                            "end": 110,
+                            "text": "SMTP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We obtained the second two datasets from the INRIA BI-GANN dataset which consists of 1 billion of 128-dimensional SIFT descriptors  extracted from approximately 1 million images  2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 66,
+                            "text": "INRIA BI-GANN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each test dataset contains 100 unseen images and unseen words.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the second data set, known as convex, each image contains a white region, and the task is to determine whether the white region is convex; examples are shown test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the Appazaar dataset is not as large as these benchmark datasets, it is still much larger than datasets that have been used in previous context-aware recommendation work .",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 29,
+                            "text": "Appazaar dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We do not have users without history in our dataset, but even if we have just one session we can still learn something from it and improve the results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate quantitatively our methodology by rendering 30 distinct head scans from our test set in frontal and side poses varying from 20 to \u221220 degrees around the yaxis in order for the ears to be visible in the image space.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiments we do not use the polynomial kernel on the entire dataset, but rather use it to extract features (i.e., do principal component regularization) using only a subset of the examples (only 5,000 examples out of 60,000).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "W S-matrix was constructed using the documents in the Wikipedia collection (en.wikipedia.org/wiki/Wikipedia: Databasedownload) with 930,000 documents written by more than 89,000 authors on various topics and writing styles.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 74,
+                            "text": "Wikipedia collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this set of experiments, we consider four TREC collections, including two newswire collections (AP and Robust) and two large-scale web collections (GOV2 and ClueWeb09",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 61,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 99,
+                            "end": 101,
+                            "text": "AP",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 106,
+                            "end": 112,
+                            "text": "Robust",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 151,
+                            "end": 155,
+                            "text": "GOV2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 160,
+                            "end": 169,
+                            "text": "ClueWeb09",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This test set was randomly chosen within demographic constrains to ensure ethnic, age and gender diversity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a reference, we create a simulated dataset whose sample size is 7042 without missing values.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments use a diverse set of datasets from urban networks and computer networks to demonstrate the practical utility of our setting.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare CRTER's results with PLM on the datasets used in , including AP88-89, WT2G and TREC 8.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A list of ingredients is a variable sized, ordered collection of unique meal constituents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first choose variables which the selection depends on, and then remove or maintain records based on the values of the chosen variables in the simulated dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Differently from the horizontal partitioning technique, the set of itemsets mined from each projection produce a proper partitioning of the global collection of frequent itemsets with complete support information.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, we experimented with TREC Novelty collections and found out that weighting with BM25F can improve on best published results for high precision metrics.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 58,
+                            "text": "TREC Novelty collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, an ensemble of residual nets  achieves 3.57% top-5 error on ImageNet test set, which is even lower than 5.1% of the reported human-level performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 90,
+                            "text": "ImageNet test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although these Siamese frameworks are pre-trained on large video datasets, the pair-wise training sample only tells whether the two samples belong to the same target or not without category information.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "illustrates the overall statistics of four datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the perplexity benchmark we used the query dataset, publicly available at h p://www.statmt.org/",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 54,
+                            "text": "query dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since we do not have access to full test set and code of some methods, values for metrics not presented in the respective papers are missing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, it is quite expensive to collect such paired multi-style captioning collections, especially when the numbers of images and styles increase.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The percentage of responders in the entire data set was about 7.5%.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we investigate the document clustering task on a standard dataset Cora with the citation information available.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we mainly evaluate our Meta-Cleaner on two popular benchmarks for noisy-labeled visual recognition, i.e., Food-101N  and Clothing1M .",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 130,
+                            "text": "Food-101N",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 136,
+                            "end": 146,
+                            "text": "Clothing1M",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Even when the original data is represented as feature vectors, transforming the data into a graph (for example using a Gaussian kernel to compute weights between points) can be convenient for exploiting properties of a data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We note that unlike the case of the real data sets, we know the exact relationship between the data features and the cluster label.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We focused our experiments on 124,878 reviews associated with Amazon products from the Multi-Domain Sentiment Dataset 6 .",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 119,
+                            "text": "Multi-Domain Sentiment Dataset 6",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For non-CMU datasets, we used the OpenPose-    : Quantitative comparison of the three models operating on different datasets (see text for discussion).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A modified version of this approach adapted to binary ratings recently placed second in the Million Song Dataset Challenge  ran by Kaggle.",
+                    "annotation_spans": [
+                        {
+                            "start": 92,
+                            "end": 112,
+                            "text": "Million Song Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As comparison labels are numerous (quadratic in the dataset size) and their acquisition is time-consuming, the experimenter collects only a subset of all possible comparison labels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The quality-biased ranking method  is commonly referred to as the state of the art in web document ranking with TREC collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 128,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unlike PointNet,  : Reduction in classification accuracy for different levels of both random and iterative saliency occlusion for all tested datasets (ModelNet10, ModelNet40 and KITTI).",
+                    "annotation_spans": [
+                        {
+                            "start": 151,
+                            "end": 161,
+                            "text": "ModelNet10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 163,
+                            "end": 173,
+                            "text": "ModelNet40",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 178,
+                            "end": 183,
+                            "text": "KITTI",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The network's goal is then to learn to map any sketch onto one of these K (random) dictionary elements so that the representations of the full sketch dataset spread out over the whole embedding space.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example,  shows the updated database D after modification of sequence 0 in the example database D. CISpan will model this update to a removal of sequence 0 in D and an insertion of sequence 0 in D .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Performance on the Wikipedia Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 36,
+                            "text": "Wikipedia Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To assess the effect of parameter T on our 3G-Net, we employ ResNet-50 as a backbone model and conduct experiments on ImageNet-1K dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 118,
+                            "end": 137,
+                            "text": "ImageNet-1K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To apply logistic regression, we use three binary classification datasets:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The negative correlation in the informational setting of NP2003 dataset is due to a heavily skewed distribution of candidate rankers when the production ranker is at a  local minimum of the ranker space and almost all candidate rankers are better for the queries in which the production ranker has not presented any relevant documents.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 71,
+                            "text": "NP2003 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Cross-modal hashing research has received increased interest over the past several years due to the recent emergence of large freely available cross-modal datasets from sources such as Flickr.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To validate this hypothesis, we design an experiment on CIFAR-100 dataset with five incremental batches (each has 20 classes).",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 73,
+                            "text": "CIFAR-100 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The labelled corpus was generated automatically from the phrase-structure trees in the Wall Street Journal portion of the Penn Treebank-III .",
+                    "annotation_spans": [
+                        {
+                            "start": 122,
+                            "end": 139,
+                            "text": "Penn Treebank-III",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although Neill's scan statistic is faster on Dataset1, it can't characterize the true clusters as accurately as GridScan can.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The document corpus is category A of the English Clue-Web12 collection which contains about 733 million documents.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 70,
+                            "text": "English Clue-Web12 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Without attention module, our framework could only capture the global appearance and motion cues, and fails to model the relations between the camera wearer and the interactor, which leads to 9.0% and 12.3% accuracy degradation on PEV and NUS(first h-h) dataset, demonstrating the importance of attention module.",
+                    "annotation_spans": [
+                        {
+                            "start": 231,
+                            "end": 261,
+                            "text": "PEV and NUS(first h-h) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, VQA v2  has been introduced to weaken language priors in the VQA v1 dataset  by identifying complementary images.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 89,
+                            "text": "VQA v1 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Indeed, the two datasets allow us to investigate the differences between sessions in which users posted a question following attempted searches, mainly due to search failure or searcher frustration, and sessions in which users that have experience of asking questions on Yahoo!",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As noted by a few research studies, such as , obtaining such a dataset by human judgment is very costly because all the activities of a user need to be examined in order to assess her expertise.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Cross-View assigns data according to camera views, where training and test set have 37, 920 and 18, 960 samples, respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For VOC data sets, a different procedure, based on the reports of VOC challenges , is used to construct multiple visual dictionaries, and each dictionary results in a different kernel.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "VOC data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this subsection, we show how our approach can be used to detect the Dust Bowl drought from the CRU precipitation dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 123,
+                            "text": "CRU precipitation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 5, we present experimental results obtained with our approach on a subcollection of the Reuters RCV1/RCV2 corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 123,
+                            "text": "Reuters RCV1/RCV2 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ICVL dataset consists of 201 images, which is by far the most comprehensive natural hyperspectral dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "ICVL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both MROTS-I and MROTS-II perform significantly better than the other baselines on the first Paper dataset (9 features and 32 outputs per sample).",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 106,
+                            "text": "Paper dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The document collection used in the experiments reported here is the one contained in TREC Disk 4 and 5, excluding the Congressional Record sub-collection, together with the 50 topics used for the TREC 8 Ad Hoc track.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 103,
+                            "text": "TREC Disk 4 and 5",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 154,
+                            "text": "Congressional Record sub-collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 197,
+                            "end": 216,
+                            "text": "TREC 8 Ad Hoc track",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the TREC-8 data set, we randomly select the first query.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 25,
+                            "text": "TREC-8 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We validate our method on the KITTI benchmark where we set new state-of-the-art results among published monocular methods, including the harder pedestrian and cyclist classes, while maintaining efficient run-time.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 45,
+                            "text": "KITTI benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A major line of research on extending SVMs to handle partially labeled datasets is based on the following idea: solve the standard SVM problem while treating the unknown labels as additional optimization variables.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The backward algorithm was superior to the static baseline for the case of the DBLP data set throughout the entire range of the X-axis.",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 92,
+                            "text": "DBLP data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The 2012 Medical Records track collection is similar to the 2011 collection in that it uses the same document set and similar topics, but it contains a better set of relevance judgments to use as ground truth.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 41,
+                            "text": "2012 Medical Records track collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 60,
+                            "end": 75,
+                            "text": "2011 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It can be seen that printing too many layers may result in a decrease in randomness, and on the contrary, printing too few causes insufficient information collection capability.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Another disadvantage of this algorithm is its time complexity of O(n 2 ) for a data set of n elements (see  for details).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In further experiments that we have run, we have split the Reuters-21578 test set even further, i.e., into 100 equallysized parts of about 33 documents each, so as to test the performance of Laplace smoothing methods in even more challenging conditions.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 81,
+                            "text": "Reuters-21578 test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, versioned document collections also have very significant redundancies between the different versions, which could be exploited by suitable compression techniques.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that we only use one redirection range of pitch for the ColumbiaGaze dataset since the gaze groundtruth in this dataset is discrete and there are only three values for the pitch angle,",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 81,
+                            "text": "ColumbiaGaze dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We analyzed neural data collected on the Neural Representation Benchmark (NRB) dataset, which was originally developed to compare monkey neural and human behavioral responses .",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 86,
+                            "text": "Neural Representation Benchmark (NRB) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the clustering results on the MixD data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 49,
+                            "text": "MixD data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(3) We empirically remove queries (from all the four datasets) by \"users\" who issued more than 1000 queries in total in the training, validation, and test datasets, because such \"users\" are more likely to be robots.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each row shows the template of a subject selected from the static BU-3DFE face database  (left), and three randomly selected animation frames, driven by the same audio input (right).",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 87,
+                            "text": "BU-3DFE face database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Adult data set, regularizer choice did not seem to matter as Random performed equally well to Control .",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 21,
+                            "text": "Adult data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After carefully checking the dataset, we find that some poorly-predicted permission categories indeed hurt the overall statistics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, most existing works in the literature are basically monolingual approaches which are restricted to the use of the original source language of the document collection, without taking advantage of potentially rich semantic information drawn from other languages.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that due to the smaller number of train scans available in the ScanNet dataset, we augment the train scans to have 4 rotations each.",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 83,
+                            "text": "ScanNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For a dataset of M examples, this gives us the sparse filtering objective (Eqn. 1):",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the two datasets, HTTP, SMTP, the training sets built are more than ten times larger than labeling sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 25,
+                            "text": "HTTP",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 27,
+                            "end": 31,
+                            "text": "SMTP",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate the performance of our AutoSVD and AutoSVD++ models on the three public accessible datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate the effectiveness of our model on three well-known datasets for different visual reasoning tasks: question-answering on video data (COG ), compositional reasoning on 3D synthetic images (CLEVR ) as well as diagram question-answering, with real-life figures extracted from textbooks (AI2D ), which is much noisier while having less training data.",
+                    "annotation_spans": [
+                        {
+                            "start": 147,
+                            "end": 150,
+                            "text": "COG",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 202,
+                            "end": 207,
+                            "text": "CLEVR",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 298,
+                            "end": 302,
+                            "text": "AI2D",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Caveat Since we use all images within one category for constructing a challenging test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The PHONECALL and DEVICE graphs are constructed using the Reality Mining data sets provided by the MIT Media Lab .",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 82,
+                            "text": "Reality Mining data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There is some work on speeding up bitwise operations for Bit-Funnel , however BitFunnel's collection of mapping matrices is costly in terms of space, and is typically larger than the corresponding inverted index.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For a fair comparison with existing works, we used 25 YUV format benchmark video sequences as our training sets, which have been previously used in .",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 74,
+                            "text": "25 YUV format benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the classification experiments in this section, we derive the two types of data representations (the sparse coding and the local simplex coding) from the points/columns extracted by compared meth- : Classification error rates in percentage (%) on texts (TDT2 and Newsgroups) and handwritten number datasets (MNIST).",
+                    "annotation_spans": [
+                        {
+                            "start": 258,
+                            "end": 262,
+                            "text": "TDT2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 267,
+                            "end": 277,
+                            "text": "Newsgroups",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 312,
+                            "end": 317,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The key idea is to view the significant subgraphs from related source datasets as strong candidatesubgraphs for the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Starting with a list of manually selected seed-words that unambiguously mark agreement or its antonym 2 we extracted related words from embeddings trained on the Google News Corpus",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 180,
+                            "text": "Google News Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This testing dataset contains 1000 images, composed of 50 unique foregrounds and 20 background images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Performance on this validation set was used to determine each MKM's architecture, as described in the previous section, and also to set the kernel width in RBF kernels, following the same methodology as earlier studies .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Across perturbation strategies and datasets, on average AMEN outperforms Flake-ODF by 16%, conductance by 18%, AW-NCut by 20%, SODA by 23%, average degree by 24%, cut ratio by 24%, and OddBall by 25%.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent years have observed some emerging benchmark studies of unconstrained face detection and verification techniques on facial images that are collected from the web, such as the LFW benchmark studies .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recent advances of 3D semantic segmentation  have accomplished promising performance in coarse-level segmentation on the ShapeNet Part dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 142,
+                            "text": "ShapeNet Part dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, on the Kinect dataset, to achieve an accuracy of 0.2, the jungle requires around 3000 nodes whereas the standard forest require around 22000 nodes.",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 34,
+                            "text": "Kinect dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, the proposed model can avoid overfitting: it does not exclusively use the latent variable \u03b8 learned from the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this study, the experiment dataset contains 56,055 artists, 43,086 albums, 1,233,651 songs, 633 genres, 677,275 users, and 305,916 playlists.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The posture dataset was preprocessed before the simulation by using DeepLab-v3  as a human detector to remove the background.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The models had 100D word embeddings and were trained to predict 5 words on both sides of the current word on the 1.5B-word Wikipedia dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 140,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The selection of the appropriate level of dimensionality reduction is dependent on the domain/dataset/query/task in question and requires careful fine-tuning to achieve the best result.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here we introduce a dataset to fill this gap called the Gracenote Music Identification 2014 dataset, or GNMID14.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 99,
+                            "text": "Gracenote Music Identification 2014 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 104,
+                            "end": 111,
+                            "text": "GNMID14",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , our proposed MIM model approaches the state-of-the-art results on the standard Moving MNIST dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 113,
+                            "text": "Moving MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, it would be useful to consider principled ways of assessing whether a given dataset has an underlying hierarchical structure, in the same way that topological data analysis  attempts to discover the topologies that underlie datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "i.e., where P i STj (C), 0 \u2264 j < n \u2212 1, is the participation index value of a co-located event set C in a dataset ST j at time slot j.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Within the full ClueWeb dataset, Wikipedia is the main contributor of relevant documents for Web track queries.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 31,
+                            "text": "ClueWeb dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the network we will fine-tune on the saliency dataset takes different inputs (takes additional contour as input) compared to the network we pretrain on the Places2 dataset, when fine-tuning our network, we keep the parameters of all the layers in the pretrained network except the first layer, and randomly initialize the first layers of our image completion module.",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 177,
+                            "text": "Places2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unfortunately, constructing a series of increasingly complex VLMMs (for example to enter a model selection phase on a validation set) by varying the construction parameters can be a troublesome task .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "v=1 represents a dataset with n samples and m views, where X v \u2208 R n\u00d7dv is the feature space of the v-th view.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Term weights are used to adjust the relative importance of each unique term found in the Interest Corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 104,
+                            "text": "Interest Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although CUB dataset is for the fine-grained task, where the inner-class variance of the appearance is considered small, that of AADB is large owing to the subjective nature of the task.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 20,
+                            "text": "CUB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the two ITG tasks, the success of Deep LSTM benchmarks relative to their performance in other tasks can be explained by their ability to exploit short local dependencies dominating the longer dependencies in these particular grammars.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If they share many significant subgraphs, the graph datasets are related.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These differences might be due to the limited number of topics available for the TREC 678 collection (20 in total), but also because of the different kind of documents in these collections (newswire articles vs. Web pages).",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 100,
+                            "text": "TREC 678 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each method is evaluated on a standard dataset for image denoising , which consists of 9 colored images corrupted with noise of \u03c3 = 25 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the CIFAR-10-C validation dataset from  and compare the robustness of normally trained classifiers to classifiers trained with an auxiliary rotation prediction loss.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 40,
+                            "text": "CIFAR-10-C validation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each dataset, we perform cross validation, repeating the partition to training and test datasets and keeping the validation set fixed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We carry out the same sampling procedure to create validation sets (10%) and test sets (20%), except that the ratios of irrelevant to relevant are 5, 10, and 20 for food, comoda, and frappe, respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The contributions of this paper are: 1) We provide a mathematically sound neural network which includes the benefits of terms that model repeated measurements, arguably a better fit with the statistical properties of most available gaze datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Number of random RNNs:  shows that increasing the number of random RNNs improves performance, leveling off at around 64 on this dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, the daily top 100 words in the TV data set change periodically because of the influence of the day of the week, as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 52,
+                            "text": "TV data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(3) Extensive experiments on two widely used datasets demonstrate the advantages of our proposal over several state-of-the-art hashing techniques.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also follow the evaluation metrics PSNR, SSIM as well as reprojection errors (RPE) for qualitative evaluation of all rectified fisheye images on our test set, as reported in Tab. 1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, sets of mutational patterns that support different tree topologies are commonly seen in clinical HIV databases.",
+                    "annotation_spans": [
+                        {
+                            "start": 106,
+                            "end": 119,
+                            "text": "HIV databases",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They exploit a divide-et-impera approach, by subdividing the original dataset into partitions that can be separately loaded and mined in the main memory.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "LETOR dataset based on synthetic clicks.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 13,
+                            "text": "LETOR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While datasets are expected to resemble the probability distribution of observations, the data collection procedure can be biased by human and systematic factors, leading to distribution mismatch between dataset and reality, as well as between two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our method is empirically validated by comparing with state-of-the-arts on natural and biomedical datasets with fine-scale structures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The corresponding data sets are denoted by Syn100.D500K.P  and Syn(200).D500K.P6 respectively.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To encode the grammatical function of each token, partof-speech tagging was done for each token within a sentence using the Apache OpenNLP 10 implementation of a max-ent POS tagger, using the Penn English Treebank POS tag dictionary 11 that comprises of 36 tags.",
+                    "annotation_spans": [
+                        {
+                            "start": 192,
+                            "end": 213,
+                            "text": "Penn English Treebank",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have tested a pre-trained VGG-Face model  on the LFW dataset constructed in Section 4.4, and its performance drops to about 1% under 20% mixed noises.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 63,
+                            "text": "LFW dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Empirical results with eight UCI datasets show that the extended level method is able to greatly improve the efficiency of multiple kernel learning in comparison with the SILP method and the SD method.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section we will introduce a new graph based SSL algorithm that can (1) determine the weight on each edge of the graph automatically; (2) handle large scale datasets in a multilevel way.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "report the offline and online performance of all OL2R methods on MQ2007 dataset under perfect, navigational and informational click models.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 79,
+                            "text": "MQ2007 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is designed to mimic the conditions of KITTI dataset and has similar scene layout, camera viewpoint, and image resolution as KITTI dataset, thus making it ideal to study the domain adaptation problems between synthetic and real data.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 55,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 128,
+                            "end": 141,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We had 720 training examples in each of ten digit categories; the test set consisted of 1320 test patterns formed by transforming independent prototypes in all meaningful combinations of the t = 6 transformations (four spatial directions and two rotation senses).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the notation of the InterPro database  we will refer to domains, motifs and other sequence patterns as signatures.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 47,
+                            "text": "InterPro database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Previous work on passagebased retrieval has shown negative or mixed results on AP, TREC-45 and WSJ collections .",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 110,
+                            "text": "AP, TREC-45 and WSJ collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On top of Tab. 2, focusing on the D1-all metric, we can notice how running full backprop online to adapt DispNetC  decimates the number of outliers on all scenarios compared to the model trained on the synthetic dataset only.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to evaluate the candidate methods, data sets with 5 inputs and 1 output were randomly generated.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There is considerable interest in the use of unsupervised clustering methods to discover structure in datasets , with much recent interest in clustering gene expression data .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Model hyperparameters (number of trees, depth, learning rate) were trained with the provided independent validation set for Y!S1, and using 5-fold cross-validation on GOV2.",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 128,
+                            "text": "Y!S1",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 167,
+                            "end": 171,
+                            "text": "GOV2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used the COCO validation set to test the performance of the LaSO models for the label set intersection, union, and subtraction operations.",
+                    "annotation_spans": [
+                        {
+                            "start": 12,
+                            "end": 31,
+                            "text": "COCO validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the impact of different coding methods on an important end-to-end task, image classification, we selected the VOC 2007 training set for classifier training, the VOC 2007 validation set for hyperparameter selection, and the VOC 2007 test set for for evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 122,
+                            "end": 143,
+                            "text": "VOC 2007 training set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 173,
+                            "end": 196,
+                            "text": "VOC 2007 validation set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 235,
+                            "end": 252,
+                            "text": "VOC 2007 test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we investigate the influence of term selection on retrieval performance on the CLEF-IP prior art test collection, using the Description section of the patent query with Language Model (LM) and BM25 scoring functions.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 127,
+                            "text": "CLEF-IP prior art test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The SM paper evaluates their convolutional neural network on the popular TrecQA dataset, rst introduced by Wang et al.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 87,
+                            "text": "TrecQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, most existing datasets focus on a specific domain like cooking, which makes them far from many real-world applications where more diverse activities occur.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this case, we are given not only attribute signatures, but also a dataset D T consisting of a small number of images with their class labels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The documents came from the AQUAINT newswire collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 55,
+                            "text": "AQUAINT newswire collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ere are 938, 035 emails from 279 accounts in this email collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Groundtruth landmarks, 3DMM and pose parameters are provided by the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, we get 7.47% error rate using 100 samples in USPS dataset, while using the squared loss the error rate is 10.8% .",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 70,
+                            "text": "USPS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, in order to measure the similarity between two authors in the DBLP dataset, two possible meta paths can be generated as follows:",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 88,
+                            "text": "DBLP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(1), for both modalities on Wiki and NUS-WIDE datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 54,
+                            "text": "Wiki and NUS-WIDE datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the 2, 000 masks of the trainings images of Helen Face Dataset as the target mask, and randomly choose face images from CelebA Dataset",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 69,
+                            "text": "Helen Face Dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 127,
+                            "end": 141,
+                            "text": "CelebA Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate these capabilities on three network datasets: a graph of Ph.D. advisor-advisee relationships , a phylogenetic tree expressing genetic heritage  and a biological set representing disease relationships .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An English-German training set contains 30 scanned books  German) from the IA database.",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 86,
+                            "text": "IA database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After performing manual inspection, we concluded that, since the search engine (SE) already sets a very strong baseline for this QA collection (MRR: 71.63% and REC1@1: 59.14%), using features similar to those of 11 http://terrier.org/ 12 http://www-03.ibm.com/innovation/us/watson/index.html 13",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset has a vocabulary size of 3097 words and 1878 possible answers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, our models have the same accuracy as reported in  on the Corel-B dataset, while we have a simpler label random field and use a smaller training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 78,
+                            "text": "Corel-B dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This process iterates until the precision stops to increase over the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, Pearson's correlation coefficient for precision and recall obtained on this subset and on the full dataset is, respectively, 0.9987 and 0.9547 (computed as mean correlation for the models other than GEO).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, on the Berkeley Segmentation Data Set and Benchmarks 500 (BSDS500) , the detection performance has been boosted from 0.598  to 0.815",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 65,
+                            "text": "Berkeley Segmentation Data Set and Benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is found that, with zero knowledge built in, the network is able to learn from scratch to play the entire game at a fairly strong intermediate level of performance, which is clearly better than conventional commercial programs, and which in fact surpasses comparable networks trained on a massive human expert data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By taking into account all the combinations of the two contextual factors, we obtain 6 types of context in the Food dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 111,
+                            "end": 123,
+                            "text": "Food dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The picture is slightly different on MQ-2009 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 55,
+                            "text": "MQ-2009 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on three standard ZSL datasets under both conventional ZSL and generalized ZSL settings.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 53,
+                            "text": "ZSL datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated SVMs with arc-cosine kernels on two challenging data sets of 28 \u00d7 28 grayscale pixel images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(Note that the total number of topics in GRLSI and GNMF is K s + 25 \u00d7 K c , where 25 is the number of classes in the Wikipedia dataset.)",
+                    "annotation_spans": [
+                        {
+                            "start": 117,
+                            "end": 134,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Inspired by question detection subtask, we conducted experiments using combinations of features on the two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To facilitate related research and validate our proposed framework, we build a large-scale and high-quality Video based Social Relation dataset, dubbed as ViSR.",
+                    "annotation_spans": [
+                        {
+                            "start": 120,
+                            "end": 143,
+                            "text": "Social Relation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section we compare the robustness of optimization strategies based on R\u00e9nyi divergences of various orders, including least squares fitting (\u03b1 = 2) and information maximization (\u03b1 = 1), as the dataset size decreases and/or neural noise increases.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conjecture that this could be partly explained to the data collection constraints, in that we required for a task to be resumed within one week in order to be marked \"Continued\".",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we compare the performance convergence of  The popularity-based recommendation generally works worse on more long tailed datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At fine-tuning step, we fine-tune all three branches on real datasets including ICDAR2015 , ICDAR2017-RCTW , SCUT-CTW1500 ,",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 89,
+                            "text": "ICDAR2015",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 92,
+                            "end": 106,
+                            "text": "ICDAR2017-RCTW",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 109,
+                            "end": 121,
+                            "text": "SCUT-CTW1500",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast to the IOtables dataset, this Enron e-mail dataset is very dynamic, as you can see from , (d).",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 35,
+                            "text": "IOtables dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 42,
+                            "end": 62,
+                            "text": "Enron e-mail dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For our experiments, we used a collection based on a crawl of 2.7 million records from the book database of the online bookseller Amazon.com.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Due to the size of the modern collection of documents, to produce a complete set of relevance judgements is impossible.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the robotics community, this dataset has remained valuable over many years  and is still a reference for the fast synthesis of grasps given known object models .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Left: examples from the rectangles-image data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The pruning technique was more effective on the Enron dataset than on the other two datasets mainly due to two reasons.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 61,
+                            "text": "Enron dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For N-gram, we generated 3,114 N-grams (1-5 grams) from the Ubuntu dataset and 1,604 N-grams from DC dataset for question detection while 2,600 N-grams from Ubuntu dataset and 1,503 N-grams from DC dataset for answer detection.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 74,
+                            "text": "Ubuntu dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 157,
+                            "end": 171,
+                            "text": "Ubuntu dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 195,
+                            "end": 205,
+                            "text": "DC dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 98,
+                            "end": 108,
+                            "text": "DC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(b) shows the average variance for each latent feature in different data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Procedures that achieve less than 0.5% error on other handwritten digit tasks barely achieve less than 4% on this one training set and the test set among two very different populations (census bureau workers for the training set, high-school students for the test set), we therefore report results on the official NIST test set (named \"hard test set\"), and on a subset of the official training set, which we kept aside for test purposes (the \"easy test set\").",
+                    "annotation_spans": [
+                        {
+                            "start": 314,
+                            "end": 327,
+                            "text": "NIST test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For simplicity, for the datasets that are generated randomly, we perform experiments over a single fixed instantiation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Information retrieval test collections traditionally use a combination of automatic and manual runs to create a pool of documents to be judged.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On popular benchmark data sets, a significant improvement over state-of-the-art methods in tasks of typical and generalized zero-shot learning verifies the generalization ability of latent features for recognizing unseen images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The predictors in ADNI, Yahoo-Computers and Olivetti data sets are highly correlated as indicated by the average \u03bb max .",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 62,
+                            "text": " ADNI, Yahoo-Computers and Olivetti data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "WikiVote dataset : The nodes in this graph correspond to users and the edges to users' votes in the election to being promoted to certain levels of Wikipedia adminship.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 16,
+                            "text": "WikiVote dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed approach dramatically improves the performance of the Jelinek-Mercer smoothing language model on a number of metrics; a combination of our approach and the Jelinek-Mercer smoothing approach can outperform the Dirichlet smoothing approach on all five collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We validate the proposed BridgeNet for age estimation on three challenging datasets: MORPH Album II , FG-NET , and Chalearn LAP 2015 datasets , and the experimental results demonstrate that our approach outperforms the state-of-the-art methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 141,
+                            "text": "MORPH Album II , FG-NET , and Chalearn LAP 2015 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use three datasets: (1) Animals with Attributes (AwA)  (M = 85 attributes, K = 10 unseen classes, 30,475 total images), (2) aPascal/aYahoo objects (aPY)  (M = 65, K = 12, 15,339 images) (3) SUN scene attributes (SUN)  (M = 102, K = 10, 14,340 images).",
+                    "annotation_spans": [
+                        {
+                            "start": 27,
+                            "end": 56,
+                            "text": "Animals with Attributes (AwA)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 127,
+                            "end": 155,
+                            "text": "aPascal/aYahoo objects (aPY)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 193,
+                            "end": 219,
+                            "text": "SUN scene attributes (SUN)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The kernel width of MNB-W and MNB-S was h = 10,000, which was the best value for the TV data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 96,
+                            "text": "TV data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each collection, we randomly divided the topics into five partitions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All datasets come with ground-truth correspondence; for cross-dataset experiments, the ground-truth is estimated using the shape registration method FARM .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After this convex hull replacement operation, a data item in the test dataset may not be contained in any polytope, and so to conduct posterior inference we augment the training dataset with a version of the testing dataset in which the label is missing, and then marginalise the missing label in the likelihood described in Section 2.1.3. ) can be computed exactly .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "LOD data sets are typically referenced online (e.g., on CKAN 11 ) and can be queried or crawled over HTTP using the SPARQL 12 declarative query language.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 13,
+                            "text": "LOD data sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 56,
+                            "end": 63,
+                            "text": "CKAN 11",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the MB2013 collection has a much larger percentage of total relevant documents that are unique.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "MB2013 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compared on DukeMTMC and MOT15 against methods that ignore appearance features because their re- sults are reported on these two datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 23,
+                            "text": "DukeMTMC",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 28,
+                            "end": 33,
+                            "text": "MOT15",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Quantitative results  shows the multi-class classification accuracy on all three datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second scoring technique assigns sm(p) to a phrase p which is based on the mutual information (MI) between the terms of phrase p and the idf values from the background corpus.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the OHSUMED test collection, a subset of the MEDLINE database, which is a bibliographic database of important, peer-reviewed medical literature maintained by the National Library of Medicine.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 27,
+                            "text": "OHSUMED test collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 45,
+                            "end": 61,
+                            "text": "MEDLINE database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The OTR tracker is extensively evaluated on two challenging RGB-D tracking benchmarks and compared to 12 state-of-the-art RGB-D trackers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset consists of sequences of length 10 of multiple moving MNIST digits.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on various TREC datasets show that our hybrid model consistently obtains better results over the best TREC systems.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 45,
+                            "text": "TREC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, the JCD is tested on NISTER  and Wang databases which contain natural color images, the BTDH on the IRMA database consisting of grayscale medical radiology images, and the SpCD",
+                    "annotation_spans": [
+                        {
+                            "start": 113,
+                            "end": 126,
+                            "text": "IRMA database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 34,
+                            "end": 60,
+                            "text": "NISTER  and Wang databases",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ImageNet 2012 dataset  comprises 1.28 million training images and 50K validation images from 1,000 classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 25,
+                            "text": "ImageNet 2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ") Performances of five detectors using HOG features trained without boosting and tested on the INRIA dataset: LDA detectors learned by Gibbs Sampling and Mean Field, Vector Quantization, Non-negative Matrix Factorization -all with K = 24 components/codewords -and Linear SVM.",
+                    "annotation_spans": [
+                        {
+                            "start": 95,
+                            "end": 108,
+                            "text": "INRIA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the MIRFLICKR-25K photo collection, ii.)",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 34,
+                            "text": "MIRFLICKR-25K photo collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the CQA-QL-2015 and the CQA-QL-2016 corpora .",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 50,
+                            "text": "CQA-QL-2015 and the CQA-QL-2016 corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 5.1, we show the loss curve of LGL is more stable than the SPL algorithm when the model or data distributions vary; In Section 5.2, we show the validation accuracy of LGL outperforms the baseline and SPL-based algorithms on the CIFAR-10 and CIFAR-100 dataset; In Section 5.3, we compare different selection strategies of LGL; In Section 5.4, we validate LGL on the ImageNet dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 239,
+                            "end": 269,
+                            "text": "CIFAR-10 and CIFAR-100 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 376,
+                            "end": 392,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thanks to the rapid development of deep neural networks and the high quality of large-scale real-world datasets, researchers have achieved inspiring progress in a range of vision-language tasks, including visual relation detection , image captioning , referring expression grounding , and visual question answering (VQA) .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It starts with the singleton code table (line 1), and a candidate collection F of frequent itemsets up to a given minsup.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The modern literature on the effectiveness and reliability of high-recall retrieval is largely confined to the problem of constructing test collections for IR evaluation, and eDiscovery in legal matters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, a commonly used benchmark is the Of-fice+Caltech dataset  where adaptation between images taken with a DSLR in an office and product images taken from Amazon is attempted.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 69,
+                            "text": "Of-fice+Caltech dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Jester-3 dataset we used 5 randomly chosen ratings for each user for training, 5 randomly chosen rating for validation and the remaining for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 24,
+                            "text": "Jester-3 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Apply clustering method M to this new dataset to obtain the corresponding set of partitionings {P r k } for k = k min through k = k max and, from these, the corresponding set of quality measures {Q(P r k )}.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we will evaluate the effectiveness of different time-aware ranking methods: LMT , LMTU , TS , TSU , and FuzzySet  using the same dataset, and we will give a brief discussion of the evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also performed an ablation study on the Cityscapes dataset in , following the evaluation procedure proposed in .",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 61,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A more interesting application was obtained on well known gene expression data, the Colon cancer data set provided by .This",
+                    "annotation_spans": [
+                        {
+                            "start": 84,
+                            "end": 105,
+                            "text": "Colon cancer data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It has also the feature to use meta-knowledge to initialize  the search i.e. that it first evaluates those algorithm/hyperparameter configuration pairs that have been good on many other data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The field of computational social science and data-driven research is growing in importance , and with this trend, there is a need for common academic benchmarking collections to facilitate a robust and reproducible research environment.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To make a fair and direct comparison with existing unsupervised models, we adopt the Network-In-Network (NIN) architecture that has shown competitive performance previously on the CIFAR-10 dataset for the unsupervised learning task .",
+                    "annotation_spans": [
+                        {
+                            "start": 180,
+                            "end": 196,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also perform comparisons on the VOT2016 dataset  which contains 60 challenging sequences.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 50,
+                            "text": "VOT2016 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, in the family of datasets DB Exp n = [r 1 , . .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As misspelling is common in the web documents and the cut-off is set low in processing these snapshots, the web N-gram dataset appears to be a valuable resource to discover common patterns of misspelling in a large scale.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we conduct the ablation studies on the Channel Shuffle Module and the Attention Residual Bottleneck on the COCO minival dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 144,
+                            "text": "COCO minival dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "That is because two models may rank the test triple (h, r , t) to be second, when the first model ranks a correct triple (e.g., from train or validation set)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, this would lead to combinatorial explosion and it is obvious that brute force approaches like this do not apply to large data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For evaluation and training, all answers that were not the highest voted were removed from the collection to reduce label noise during training and provide a better judgment of performance during evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comparison of state-of-the-art performance on existing datasets for video analysis.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Third, we see a drop in error when we add COCO to the training data, which suggests that our framework can take advantage of this dataset with only 2D human pose annotation for 3D pose estimation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "gives the classification results on Emotions data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 53,
+                            "text": "Emotions data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All occurrences of the words in a test collection are replaced with a token that represents the pseudo-word (e.g. replacing \"banana\" and \"magazine\" with \"banana#magazine\").",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments are conducted on large-scale ImageNet-1K and Places365 datasets, and the results demonstrate our 3G-Net outperforms its counterparts while achieving very competitive performance to state-of-the-arts.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 79,
+                            "text": "ImageNet-1K and Places365 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The sample size of the remaining complete dataset is 7042.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, take ACM DL corpus as an example, for 41,378 publications, we only need to collect OERs for 9,263 popular author assigned keywords (appear at least 5 times in the corpus).",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 32,
+                            "text": "ACM DL corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After the training phase, the validation dataset is used for selecting the best parameter setting for each method, such as the number of weak learners of FRank and RankBoost, and the number of epochs of RankNet.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct an ablation studies using the DAVIS-2017 validation set to validate the effectiveness of our feature aggregation module and training scheme.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 66,
+                            "text": "DAVIS-2017 validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, one can represent the relation between the books and the readers (users) on the Amazon dataset with a hypergraph.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 107,
+                            "text": "Amazon dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the MSN dataset which is considered harder since it has more misspelled queries, gHMM also achieves high precision of 0.910 and recall of 0.966, which are both significantly better than that of the Lueck-2011 (0.896 and 0.921).",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 18,
+                            "text": "MSN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The English test set of CLEF-IP 2010 corresponds to 1348 topics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Fewer methods have reported results on this newer dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we show that DGAN obtains competitive results on the benchmark datasets MNIST, CIFAR10, CIFAR100, and CelebA (at resolution 128 \u21e5 128).",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 94,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 96,
+                            "end": 103,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 113,
+                            "text": "CIFAR100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 125,
+                            "text": "CelebA",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(We omitted ABOD for the ALOI data set, where it did not finish in reasonable time.)",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 38,
+                            "text": "ALOI data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our proposal has been validated by means of user studies and lab experiments using MovieLens dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 100,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the object, attribute, and relationship annotations are very noisy in VG dataset, we filter them by keeping the objects, attributes, and relationships which appear more than 2, 000 times in the training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 86,
+                            "text": "VG dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These references appear to be representative of the current state-of-the-art for deep and shallow architectures on these data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "a. Obtain the k-cluster partition P k 0 of the dataset D using method M. b. Compute",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Qualitative examination of the topics discovered using USTM framework in the two datasets collected from popular online consumer review platforms as well as quantitative evaluation of the methods utilizing those topics for the tasks of review sentiment classification and user attribute prediction both indicate the utility of accounting for demographic information of review authors in opinion mining.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset consists of scene text images which come from 9 languages.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The effectiveness of muSSP is evidenced by forty experiments on five MOT datasets combined with three widely used graph design methods .",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 81,
+                            "text": "MOT datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on sparse datasets are acquired using the Cori supercomputer at NERSC.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated our algorithm on MovieLens dataset containing 100,000 ratings for 1,682 movies by 943 users, which  In order to compare our algorithm with other promising graph based approaches, we chose the degree of agreement (DOA) as the performance measure.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 47,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We regard original images in the dataset as being in default orientation and label them as positive examples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", i.e., w \u22a5 1 = w, u 1 u 1 , u 2 denotes the remaining orthogonal component of w, and w denotes the component tangent to v. Shape variability of corpus callosum data: As a demonstration of PPGA on Kendall shape space, we applied it to corpus callosum shape data derived from the OASIS database (www. oasis-brains.org).",
+                    "annotation_spans": [
+                        {
+                            "start": 279,
+                            "end": 293,
+                            "text": "OASIS database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have shown how exploitating such partitioning technique requires a double scan of the dataset to collect enough information to decide how to subdivide it in order to obtain projected partitions that fit the available memory.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset contains a lot of users with just a few sessions (which is also true in general) and thus such models do not have good generalization properties.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This allows us to easily annotate a large-scale LDR panorama dataset  with lighting parameters and subsequently infer lighting from a single image of a generic outdoor scene in an end-to-end framework.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 68,
+                            "text": "LDR panorama dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, the challenge cannot be construed as a traditional clustering problem; simply clustering images by their appearance is prone to reshaping datasets into per-category domains, as observed in  and our own empirical studies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ttest results in  proves again that angle is indeed the most suitable metric for GAK-means to cluster the la1 data set, especially in terms of CE.",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 122,
+                            "text": "la1 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Kunze and Auer  introduce dataset retrieval as a specialization of information retrieval, however, they restrict their scope to the process of returning relevant RDF datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 174,
+                            "text": "RDF datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Visual reasoning The research efforts towards VQA models that are able to reason about a visual scene is mainly conducted using the CLEVR dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 132,
+                            "end": 145,
+                            "text": "CLEVR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, it shows that the initial choice of two clusters was well justified for this data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our task was to compute the anomaly scores of individual variables, rather than simply detecting that two data sets are different.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the the SA+LearningW method does not work well as the RM+Oracle method on AP and WSJ datasets, there is a smaller gap between them.",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 102,
+                            "text": "AP and WSJ datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Different from classical datasets like MSCOCO , VQA  or Visual Dialog , this new dataset contains short video clips, the corresponding audio stream and a sequence of question-answer pairs.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 45,
+                            "text": "MSCOCO",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 48,
+                            "end": 51,
+                            "text": "VQA",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 56,
+                            "end": 69,
+                            "text": "Visual Dialog",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Cormorant significantly outperforms competing algorithms in learning molecular Potential Energy Surfaces from conformational geometries in the MD-17 dataset, and is competitive with other methods at learning geometric, energetic, electronic, and thermodynamic properties of molecules on the GDB-9 dataset. .",
+                    "annotation_spans": [
+                        {
+                            "start": 143,
+                            "end": 156,
+                            "text": "MD-17 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 291,
+                            "end": 304,
+                            "text": "GDB-9 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The E. Coli Promoters data set was obtained from the UCI machine learning repository .",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 84,
+                            "text": "UCI machine learning repository",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 4,
+                            "end": 30,
+                            "text": "E. Coli Promoters data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": There has been much less work on positional indexes for versioned collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The normalizing distribution x is now the distribution of the feature responses of layer F 1 on the target dataset T .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiment with the landmark dataset in  used a regularization parameter value of = 0.556, while the experiments with the NIPS and synthetic datasets used a value of = 1.",
+                    "annotation_spans": [
+                        {
+                            "start": 126,
+                            "end": 153,
+                            "text": "NIPS and synthetic datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Book Crossing (BX) dataset is extracted from Book Crossing data",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 26,
+                            "text": "Book Crossing (BX) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The task of extracting and ranking sentiment spikes' triggers requires a dataset that spans over several months.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While visual grounding in static images has witnessed great progress , visual grounding in videos is still challenging-first, a video contains many frames, which induces the temporal visual-language alignment problem that is unique to video grounding; second, despite rich source of online videos, constructing a largescale video dataset with grounding annotation is expensive and time-consuming.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to related algorithms like ION, it produces very concise and easily interpretable output, and does not suffer from the inability to handle any differences in observed dependencies between data sets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We particularly focus on application domains where the datasets are stored as flat files, and not on top of a relational database.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation conducted on a standardized dataset of geolocated listening events showed that music recommendations based on a combination of collaborative filtering (CF) and",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It indicates the proposed method is efficient on large-scale dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use standard classification and regression datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the fitting error over time on the sparse ChicagoCrime tensor using two forgetting factors, \u00b5. We now use relative error for the sparse dataset, as it lends a more interpretable result.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Aspect Detection: This task aims to discover the K ratable product aspects a k evaluated in the collection of reviews R, typically by clustering the synonymous or semantically related keywords (aspect terms or opinion words) appearing in the reviews.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The real datasets contain many different indoor and outdoor scenes captured with various rotations and translations of the DAVIS camera.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The bottom row shows the learned classifier's discriminative weights w k for each category, which can be used to gain a sense for how the cluster's members differ from the dataset mean waveform.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, writing structured queries requires extensive experience in query language and data model, and good understanding of particular datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is usually above Co Rank (nDCG di erence between 0.01 and 0.08), with the exception of the Yahoo dataset for train sizes 20 and 50",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 107,
+                            "text": "Yahoo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each 3D point in these datasets is at a visually-distinctive location and is augmented with an image feature, simplifying the correspondence problem.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our methodology on visual recognition tasks where CNNs have proven to perform well, e.g., digit recognition with the MNIST dataset, and the more challenging CIFAR-10 and STL-10 datasets, where our accuracy is competitive with the state of the art.",
+                    "annotation_spans": [
+                        {
+                            "start": 129,
+                            "end": 142,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 169,
+                            "end": 197,
+                            "text": "CIFAR-10 and STL-10 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To better understand this, we visualize the full graph, the training graph of the Facebook dataset together with the predictions made by each model in .",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 98,
+                            "text": "Facebook dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Wiki dataset, we define that a target document d is relevant to a query q if d and q belong to the same semantic class.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 20,
+                            "text": "Wiki dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to study the effectiveness of our methods, we evaluate them on a collection of more than 200 instances from the literature.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the validation set of VisDial v1.0, we observed that: 1) 56% of question-guided visual attention and 89% of recursive attention are reasonable for ambiguous questions; 2) 62% of dialogs require at least one accurate co-reference resolution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show results using the OHSUMED dataset for 3 different variants and obtain higher performance, up to 12.5% in Mean Average Precision (MAP).",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 41,
+                            "text": "OHSUMED dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(there are 18 genres in the MovieLens dataset).",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 45,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , we evaluate the face recognition performance on our normalized images of Multi-PIE database.",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 105,
+                            "text": "Multi-PIE database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The parameter \u03b1 is selected experimentally to achieve maximum CLL on DevSet2 and P (y) refers to the prior over the labels calculated directly from their relative proportions in the training set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments have been conducted to analyze the performance of GHF-ART on two heterogeneous social network data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "K dataset contains 10,000 queries samples at random from the previous.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the performances of our model and its variants in the benchmark data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In view of the importance of RGB-D for detection-based approaches, it is highly demanded to have a large-scale RGB-D crowd counting dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The entire TREC dataset was used for evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 23,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on Clickture Dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 28,
+                            "text": "Clickture Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Annotated with texts and temporal boundaries of a series of steps to complete different tasks, these datasets have provided good benchmarks for preliminary research.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly to the Yelp review dataset, we also constructed 2 datasets -one full score prediction and another polarity prediction.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 36,
+                            "text": "Yelp review dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, in  : The comparison of the number of closed association rules and the total number of rules discovered in the trains dataset, using the three different methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a second step, we test the proposed model against the strong non bag-of-words MRF model, which has shown to be highly effective especially for large scale web collections .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed model achieves the state-of-the-art results on both NTU RGB+D dataset and Northwestern-UCLA dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 82,
+                            "text": "NTU RGB+D dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 87,
+                            "end": 112,
+                            "text": "Northwestern-UCLA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 2 will briefly outline these methods and their theoretical foundations, while Section 3 will empirically evaluate the robustness of these methods using synthetic data sets that increasingly violate some of the statistical assumptions of the techniques.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Pima Indians Diabetes (Pima), Haberman's Survival (Haberman), Wisconsin Breast Cancer (Wisconsin), Computer Hardware (CPU), and Auto MPG data sets have been taken from the UCI machine learning repository .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 150,
+                            "text": "Pima Indians Diabetes (Pima), Haberman's Survival (Haberman), Wisconsin Breast Cancer (Wisconsin), Computer Hardware (CPU), and Auto MPG data sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 176,
+                            "end": 207,
+                            "text": "UCI machine learning repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, large data sets (like graphs, trees, and text collections) can be manipulated in main memory, avoiding the secondary storage.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "proposed different CNN-based architectures for gait recognition and produced state-of-the-art recognition accuracy on CASIA-B dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 118,
+                            "end": 133,
+                            "text": "CASIA-B dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next we trained the full model on the same 'p'-dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "used a large scale dataset from a Chinese retailer, which was originally designed for a recommendation task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "And the New York Times news corpus contains business news mentioning selected sets of companies.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 34,
+                            "text": "New York Times news corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "During training after each epoch we evaluate our performance on the validation set using a perplexity metric.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this context, the GLUE benchmark  has become a prominent evaluation framework for research towards general-purpose language understanding technologies.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 35,
+                            "text": "GLUE benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The papers from the Image Search, Indexing, Retrieval Models, Test Collections, and Web Queries sessions all used public datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "and  show that the proposed new algorithm works better than the standard EM algorithm on the Netflix and MovieLens data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 124,
+                            "text": "Netflix and MovieLens data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "COCO is the largest dataset that meets both of these requirements, so all of our models are trained end-toend on the COCOPersons training set with the annotations of pose keypoints and segmentation masks.",
+                    "annotation_spans": [
+                        {
+                            "start": 117,
+                            "end": 141,
+                            "text": "COCOPersons training set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 0,
+                            "end": 4,
+                            "text": "COCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "L. We focus on real-world data from the popular Movielens dataset (https://grouplens. org/datasets/movielens/) for our experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 65,
+                            "text": "Movielens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We assume that the key storage bottleneck is the probability density function for each attribute in the uncertain data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use Deeplearning4j's word2vec tool and word embeddings vectors trained on Wikipedia and the Gigaword corpus 7 and select words with common stems in the top M = 20 for each query term t.",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 110,
+                            "text": "Wikipedia and the Gigaword corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Despite the fact that PointNet  is able to achieve high accuracy on the ModelNet40 test set, we show that by exploiting the low cardinality of the induced critical point set we can cause the network to misclassify a man wearing a winter coat and beanie as a plant after only 32 out of 2048 points have been removed from the point cloud.",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 91,
+                            "text": "ModelNet40 test set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Among them, MAR and MNAR may introduce wrong causal conclusions if one simply deletes the data with missing entries, and applies causal discovery algorithms to the deleted complete dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we plotted the execution times, the number of partitions, and the amount of memory actually used by DCI CLOSED OOC for mining dataset Webdocs as a function of the memory threshold imposed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The parameters of our methods were also tuned on the validation datasets; according to the results, the number of weak learners t and the combination coefficient \u03bb were set to 7 and 0.07 for NTCIR3 as well as 13 and 0.01 for NTCIR4.  )",
+                    "annotation_spans": [
+                        {
+                            "start": 191,
+                            "end": 197,
+                            "text": "NTCIR3",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 225,
+                            "end": 231,
+                            "text": "NTCIR4",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct extensive evaluations on the CUHK03-NP, DukeMTMC-ReID, and Market-1501 datasets and report competitive performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 49,
+                            "text": "CUHK03-NP",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 51,
+                            "end": 64,
+                            "text": "DukeMTMC-ReID",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 70,
+                            "end": 90,
+                            "text": "Market-1501 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For future work we expect to further improve upon these results by increasing the complexity and expressiveness of our simple spatial model (especially for unconstrained datasets like LSP).",
+                    "annotation_spans": [
+                        {
+                            "start": 184,
+                            "end": 187,
+                            "text": "LSP",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The kernel has been tested on five datasets: Mutag , CAS 2 , CPDB , AIDS  and NCI1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 50,
+                            "text": "Mutag",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 53,
+                            "end": 58,
+                            "text": "CAS 2",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 61,
+                            "end": 65,
+                            "text": "CPDB",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 68,
+                            "end": 72,
+                            "text": "AIDS",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 78,
+                            "end": 82,
+                            "text": "NCI1",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we describe the Lattes Expertise Retrieval (LExR) test collection for research on academic expertise retrieval.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 80,
+                            "text": "Lattes Expertise Retrieval (LExR) test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The video caption datasets are annotated with descried sentences or phrases, which can be based on either a trimmed video  or different segments of a long video .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When we conduct the transfer learning on the target datasets of the target tasks, we follow the same hyperparameter settings of Hara et al. ,",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Such term correlation data is less sparse and more stable with the increase of the collection size, and can well capture the necessary information for topic learning.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In similar vein as the cross topic evaluation, the incorporation of the adversarial signal significantly increases performance on the held out collections in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We measure the generalizability of our W-RPN proposals across different network architectures, weaklysupervised approaches, and datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the NIPS data set, there is a slight perplexity degradation, which is partially due to non-optimal parameter settings for \u03b1 and \u03b3.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "NIPS data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use CNN's public Facebook page, one of the largest newsgroups on Facebook, as our data set and collect all its information, including the content of comments, user-like information, and their time stamps.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we first describe the data sets used in this paper.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One challenge of the Cambridge Landmarks and 7 Scenes datasets is that there are significant differences in pose between the training and test images.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 62,
+                            "text": "Cambridge Landmarks and 7 Scenes datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Caltech-101 dataset, we consider seven kinds of image descriptors that result in the seven base kernels (denoted below in bold and in abbreviation):",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 27,
+                            "text": "Caltech-101 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For CRLSI and CNMF, we also trained the models on Web-I dataset under the same parameter settings in Section 7.1, except parameter K s , as there exists no shared topic in CRLSI and CNMF.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 63,
+                            "text": "Web-I dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "O, which is a collection of objects { Io, Fo, Yo }, where both Io and Yo are sets of tags associated with object o.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that COCO classes also include animals, which are not relevant for the tasks in our dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our final set of experiments used the data from the Graz dataset 3 , as well as the dataset proposed in .",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 64,
+                            "text": "Graz dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While prior work has largely focused on specific techniques for query reformulation, in Section 3, we first build an oracular query formed from known relevance judgments for the CLEP-IP 2010 prior art test collection  in an attempt to derive an upper bound on performance of standard Okapi BM25 and Language Models (LM) retrieval algorithms for this task.",
+                    "annotation_spans": [
+                        {
+                            "start": 178,
+                            "end": 216,
+                            "text": "CLEP-IP 2010 prior art test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have the input document cluster as DI , and the background corpus as DB which is all the other clusters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show depth predictions for images (and cropped/resized versions) from the NYUv2, KITTI and MegaDepth test sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 110,
+                            "text": "NYUv2, KITTI and MegaDepth test sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Next, we test our method on reconstructing objects from videos of two popular video segmentation datasets: VSB100 , that contains videos uploaded to Youtube, and Moseg , that contains videos from Hollywood movies.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 113,
+                            "text": "VSB100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 162,
+                            "end": 167,
+                            "text": "Moseg",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": 10 most frequent closed patterns in Pioneer dataset 2.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 55,
+                            "text": "Pioneer dataset 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimental results on the evaluation dataset showed that the proposed methods outperformed other methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "3.2 and removing non-detected face poses with extreme lighting conditions, we finally obtained about 148K, 48K, 295K frames for training, validation and testing sets on the CASIA-SURF dataset, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 173,
+                            "end": 191,
+                            "text": "CASIA-SURF dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Instead, we apply our \"overlap-multiresolution partitioning\" algorithm , generalizing this method from two-dimensional to d-dimensional datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate on standard NER datasets if they are available.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 36,
+                            "text": "NER datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Queries in each collection were randomly partitioned.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A new dataset consists of 92 video clips for imitation filming.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test our algorithm on the UCF 50 action database , which includes 50 categories of 6,680 human action videos from YouTube.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 51,
+                            "text": "UCF 50 action database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our model on CLEF-IP 2010 collection and we report significant improvement over the strong CLEF-IP baselines.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 48,
+                            "text": "CLEF-IP 2010 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to evaluate the methods, we used the topic set and W3C corpus provided by NIST in context of the Discussion Search 2006 task.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 70,
+                            "text": "W3C corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we show qualitative results of part of our task graph prediction on the JIGSAWS dataset in .",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 100,
+                            "text": "JIGSAWS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "in the three datasets, which validates the effectiveness of our method for localization.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that for this dataset, the data is not memory resident even on 8 nodes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Music as positive feedbacks, and Last.fm itself is an implicit feedback dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, the PhotoShop-battle dataset  is also used, because it is large (total 102,028 samples) and diverse (contributed from 31,272 online artists), and it reflects the level of real-life image manipulation.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 41,
+                            "text": "PhotoShop-battle dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also find that GES benefits relatively more than other methods from such property of the simulated dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Lower values of \u03bd make the datasets more heteroscedastic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this scenario, large text collections are usually partitioned over a number of nodes in a cluster, such that each partition fits into the memory of its node.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, one reason may be that some methods are trained using datasets which are of small scale and/or indistinctive variations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider a version of the Snelson dataset  where, to assess the 'in-between' uncertainty, we remove the data points between x = 1.5 and x = 3.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 44,
+                            "text": "Snelson dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Accuracy (Ac): the ratio of the number of correctly classified data points to the total number of data points in the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the experiments with the automatic content extraction (ACE) corpus and a Korean news corpus, the proposed model outperformed the previous systems based on SVM tree kernels even though it used more shallow linguistic knowledge.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compute for each MANO vertex how often across the dataset it is in the immediate vicinity of the object (defined as less than 3mm away from the object's surface).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The program was evaluated and optimized by scanning the NBRF protein databases (PROTEIN and NEW) version 19.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 107,
+                            "text": "NBRF protein databases (PROTEIN and NEW) version 19",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, we generate a collection of m randomized datasets from our original dataset, in which independent random permutations are applied to the individual components of the attribute vector associated with each object.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "reports the results of the experiments on the Opinosis data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 63,
+                            "text": "Opinosis data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Madry, Makelov, Schmidt, et al.  used adversarial training on the cifar dataset, which still has the best empirical robustness to attack  and has been repeatedly validated as effective and capable of fully defending against the best known adversaries under the whitebox threat model .",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 79,
+                            "text": "cifar dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also evaluated the quality of the document representation h(v) learned by DocNADE in an information retrieval task using the 20 Newsgroups data set and its label information.",
+                    "annotation_spans": [
+                        {
+                            "start": 128,
+                            "end": 150,
+                            "text": "20 Newsgroups data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used Lucene to index the English subset of the CLEF-IP 2010 dataset 1 that contains 2.6 million patent documents and a subset of 1281 topics (queries) in the English test set where we determined at least one valid, relevant English document was available.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 70,
+                            "text": "CLEF-IP 2010 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have chosen the 'Billion Triples Challenge' 2009 data set, a data-set created for the Semantic Web Challenge 4 in 2009 and which is well-known in the community.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 60,
+                            "text": "'Billion Triples Challenge' 2009 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly to , the input images are down-sampled to a resolution of 512 \u00d7 256 from the original sizes which are 1226\u00d7370 for the KITTI dataset and for CityScapes.",
+                    "annotation_spans": [
+                        {
+                            "start": 129,
+                            "end": 142,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 151,
+                            "end": 161,
+                            "text": "CityScapes",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Kissing actions is more challenging compared with other action classes in this dataset due to less motion and appearance cues.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similarly, in image annotation tasks, one image can contain multiple objects or tags, and researchers in computer vision are interested in automatically predicting tags/objects for unlabeled image collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "According to a study by Baruah et al. , the total assessment time spent to create the 2013 and 2014 TREC-TS test collections was around 375 hours, where over 80% of that time was spent on nugget matching.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 124,
+                            "text": "2013 and 2014 TREC-TS test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiment results on six widely used benchmark datasets, i.e., MNIST, CIFAR10, SHREC13, SHREC14, ModelNet10 and Model-Net40, demonstrate the superiority of the proposed method.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 69,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 71,
+                            "end": 78,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 80,
+                            "end": 87,
+                            "text": "SHREC13",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 96,
+                            "text": "SHREC14",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 98,
+                            "end": 108,
+                            "text": "ModelNet10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 113,
+                            "end": 124,
+                            "text": "Model-Net40",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have developed a practical inference approach based on Markov chain Monte Carlo and demonstrated it on two real-world data sets in different domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a dataset and a minimum support threshold, the amount of memory required for mining frequent closed itemsets depends, of course, on the specific algorithm exploited.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results: We now present preliminary experimental results on our data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To this end, we focus on the Human3.6M dataset and we evaluate the regressed shape through 3D pose accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 46,
+                            "text": "Human3.6M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "excluded the mushroom dataset as even very simple logical rules achieve > 99% accuracy on this dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 29,
+                            "text": "mushroom dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on UCI Datasets and Gene Expression Datasets..",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 27,
+                            "text": "UCI Datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 32,
+                            "end": 56,
+                            "text": "Gene Expression Datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Web page collection, we also find that many pages are attacked by hackers, and a large number of links are added in invisible blocks in these pages.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 26,
+                            "text": "Web page collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The SVHN dataset has 73,257 32 \u00d7 32 color digit images in the training set and 26,032 images in the test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 16,
+                            "text": "SVHN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(1) Compared with polynomial filters, DFNet improves upon GCN (which performs best among the models using polynomial filters) by a margin of 3.7%, 3.9%, 5.3% and 2.3% on the datasets Cora, Citeseer, Pubmed and NELL, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 183,
+                            "end": 187,
+                            "text": "Cora",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 189,
+                            "end": 197,
+                            "text": "Citeseer",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 210,
+                            "end": 214,
+                            "text": "NELL",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 199,
+                            "end": 205,
+                            "text": "Pubmed",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Wikipedia dataset based on ClueWeb, the proposed model also consistently outperforms SFI and LR.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 25,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 35,
+                            "end": 42,
+                            "text": "ClueWeb",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We start our evaluation by analyzing the impact of unlabeled data on mean average uninterpolated precision (mAUP) and mean precision@50 (mT9P) for a fixed number of labeled and unlabeled examples in the training set of each topic in Ohsumed and Reuters datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 233,
+                            "end": 261,
+                            "text": "Ohsumed and Reuters datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our COIN is relevant to the above mentioned datasets, as it requires to localize the temporal boundaries of important steps corresponding to a task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition to presenting a thorough theoretical analysis, we have evaluated our approach on both synthetic and real data (images from the LabelMe database  represented by high-dimensional GIST descriptors ) and compared its performance to that of spectral hashing.",
+                    "annotation_spans": [
+                        {
+                            "start": 139,
+                            "end": 155,
+                            "text": "LabelMe database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are totally 30 categories of mobile apps in our dataset (denoted as z =",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the MMTD dataset contains a lot of users with too few listening events to make reliable recommendations, we consider only users with at least five listening events by dif-ferent artists.",
+                    "annotation_spans": [
+                        {
+                            "start": 10,
+                            "end": 22,
+                            "text": "MMTD dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The validation set is a small subset of exemplars which is excluded from training and used for bias correction alone.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We are given a factual dataset P = {(x,\u0177 f )}, with paired image x along with its corresponding factual caption\u0177 f , and a collection of unpaired stylized sentences P u = {(\u0177 s , s)}, s \u2208 {s 1 ...s k } containing captions of k distinctive styles, where\u0177 s denote a stylized caption with style s.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For both the datasets, DS1 and DS2 we notice that the difference in run times between the equal and unequal datasets for a larger w-mer size (w = 7) is greater than for a smaller w-mer size.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 34,
+                            "text": "DS1 and DS2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MAP scores on Wiki-Potd data set is generally lower than that on NUS-WIDE even if the Wiki-Potd data set contains rich textual information.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 36,
+                            "text": "Wiki-Potd data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 69,
+                            "end": 77,
+                            "text": "NUS-WIDE",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 90,
+                            "end": 108,
+                            "text": "Wiki-Potd data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used new partitions for the above data sets proposed by Xian et al.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We adopt ResNet-18/34/50 as reference architectures, and we gather 30,000+45,000 images from an auxiliary dataset  and the ImageNet validation set to train them from scratch.",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 146,
+                            "text": "ImageNet validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that any binary or categorical dataset can be trivially converted into a transaction database.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Such extraction takes about 100 ms on our GPU (30 days for the 20M collection).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They are not favored in a rotation agnostic image dataset like plankton  and ISBI 2012 electron microscopy segmentation challenge .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To facilitate the performance evaluation of data-driven approaches for crowd counting, we introduce a large-scale RGB-D dataset named Shang-haiTechRGBD that contains 2,193 images with 144,512 annotated head counts.",
+                    "annotation_spans": [
+                        {
+                            "start": 134,
+                            "end": 151,
+                            "text": "Shang-haiTechRGBD",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that we   , CDbw is the only competitor that finds some arbitrarily shaped structure in the datasets, although it has some flaws.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We operate on a collection D of versioned documents.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Office-Home  is a challenging domain adaptation dataset, crawled through several search engines and online image directories.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 11,
+                            "text": "Office-Home",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first provide information about the dataset, the baselines and the evaluation metrics before presenting our results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Contrary to this, in the Forest Cover Type dataset, instances are arriving from more than one class at each time point.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 50,
+                            "text": "Forest Cover Type dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Blogs06, ClueWeb09-T09B, and Chinese corpus SogouT2.0, we get similar results.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 56,
+                            "text": "Blogs06, ClueWeb09-T09B, and Chinese corpus SogouT2.0",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The details of these three datasets are as follows:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply our proposed model to simulated data and several gene expression datasets, and demonstrate its effectiveness compared to other modern machine learning methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Another important direction is the reduction of the fundamental computational complexity O ( n 2 ) , which is prohibitive in the huge datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the original dataset contains a large number of records, we divide it into 10 consecutive windows, w0, ..., w9, each of which contains one-tenth of the records in the dataset (denoted as |w|).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, deeper models overfit the biases of the COCO dataset used for pretraining, so that the best performance when transferred to DensePose-Track is at 6 stacks.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 61,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use a moving multi-MNIST dataset to show limitations of AIR in detecting overlapping or partially occluded objects, and show how SQAIR overcomes them by leveraging temporal consistency of objects.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Denote N T (\u1ef9 i ) as the collection of top-m nearest neighbors of a mapped embedding vector\u1ef9 i in the target language.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These queries are double-checked to make sure that they are not in the CLQS training corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 91,
+                            "text": "CLQS training corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows that applying the same method to the SUN 09 dataset (that contains more objects and so intuitively has more contextual information ) results in a similar pattern of results (orange curve), albeit with greater variance (i.e., larger improvements but also greater diminished results).",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 57,
+                            "text": "SUN 09 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The full model includes the written character class as a random variable and can thus be trained on multi-character data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As example, we show the topics generated by GRLSI and GNMF in the setting of (K s = 20, K c = 8(10), \u03bb 1 = 0.01, \u03bb 2 = 0.1) for both Wikipedia and Web-I.  and  present example topics randomly selected from the topics discovered by GRLSI and GNMF on Wikipedia and Web-I. For each of the datasets and each of the methods, 3 shared topics and 9 class-specific topics are presented.",
+                    "annotation_spans": [
+                        {
+                            "start": 133,
+                            "end": 142,
+                            "text": "Wikipedia",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 147,
+                            "end": 152,
+                            "text": "Web-I",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 249,
+                            "end": 258,
+                            "text": "Wikipedia",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 263,
+                            "end": 268,
+                            "text": "Web-I",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, for the recent TREC Terabyte track, using the GOV2 collection (25 million documents), the completeness of the relevance judgement is two orders of magnitude less than in the early TREC years.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 76,
+                            "text": "GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our system is related to approaches that extract textures from multi-view image collections  or multi-view video collections  or a single video .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All models perform almost similarly on the second Paper dataset (9 features and 13 outputs per sample), which could be due to the absence of a strong task or output structure in this data.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 63,
+                            "text": "Paper dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To estimate the lookup cost using Equation 4.10 we use the same 10,000 database objects as both sample queries (Q in Equation 4.10) and sample database objects (X in Equation 4.10).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, for the sake of more reliable comparisons with the baselines, we carry out two experiments by taking first 50 judged queries from MQ-07 and MQ-08 collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 141,
+                            "end": 168,
+                            "text": "MQ-07 and MQ-08 collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With the help of the proposed PartNet dataset, researchers can now work on this more challenging task with little overhead.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 45,
+                            "text": "PartNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We exhaustively test 51 representative black-white triggers in 3 \u00d7 3 size on the Ci-far10 dataset, and also random color triggers on Cifar10/100 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 97,
+                            "text": "Ci-far10 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 133,
+                            "end": 152,
+                            "text": "Cifar10/100 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Because PCA was fairly slow for the Nova dataset (it had 17K features), we removed any words that appeared in fewer than 100 documents (i.e. 0.5% of all documents) before applying PCA.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 48,
+                            "text": "Nova dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We hypothesize that during the training, any unobserved item should not be ranked higher than any observed positive training items because 1) given the fact that the large amount of unseen items in the collection are negative, it is unlikely that an unobserved yet positive item is to be selected from a sampling procedure; and 2) even if it is a positive item, it should not be ranked higher than the known positive ones, as long as it is ranked higher than other unobserved items.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A 16 layer VGG model  is used for frame-level feature extraction in experiments on DiDeMo dataset following .",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 97,
+                            "text": "DiDeMo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The training set for the HMM system and for the MLP consisted of the 3990 sentences that make up the standard DARPA speaker-independent training set for the Resource Management task.",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 148,
+                            "text": "DARPA speaker-independent training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "results by creating a collection of computational negative controls from the original dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this category, N is the number of indexed documents in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is hard to evaluate the attribute recognition accuracy on the re-id dataset without ground-truth labels, while the advantage of L W BCE and L M M D can be indirectly proven by the quantitative re-id accuracy, as shown in  2.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since FCI, PC, and GES cannot deal with the dataset containing missing values, we delete the records containing any missing value and input the remaining complete dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "GRU adds a gating mechanism to standard RNN  and has been found to have better performance on smaller datasets , which suits the case of cyberbullying detection since these datasets are hard to obtain.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As established in Sec.2.3, there are currently no publicly available test collections with ambiguous queries.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Indeed, when several latest large-scale FSL models  are evaluated on the large-scale ILSVRC2012/2010 (Im-Net) dataset , they all struggle to beat this forgotten baseline.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 117,
+                            "text": "ILSVRC2012/2010 (Im-Net) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We consider the problem of recognizing facial action units from real image data using the CK+ data set .",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 102,
+                            "text": "CK+ data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Conventionally, documents are presented via the wellknown Vector Space Model, which models a collection of documents by a term-document matrix X, with each entry x ij indicating the weight (typically by TFIDF) of the term t i in document d j .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Both the Outlook email application and the web email clients have standard email search capabilities, which includes: a search box at the top le corner of the interface, search results displayed in the middle of the screen  Activity Log Collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 216,
+                            "end": 247,
+                            "text": "screen  Activity Log Collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this part, we focus more specifically on our discovery procedure, show qualitative results on various datasets, and evaluate quantitatively for place recognition on the LTLL dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 172,
+                            "end": 184,
+                            "text": "LTLL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also incorporate the use of self-taught learning  in our annotation experiments by utilizing the Mirflickr dataset for dictionary learning.",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 117,
+                            "text": "Mirflickr dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Intuitively, if the source datasets are correlated with the target dataset, the source significant subgraphs may contain information also useful in the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They can come from the target collection or from external sources and several sources can be combined .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset contains 9,848 videos taken from CHARADES, a multi-action dataset with 157 action categories .",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 53,
+                            "text": "CHARADES",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the BPMF model, we have used different numbers of latent features (in particular, 10, 20 and 50), although we only show the best results for each database, specifically, K = 10 for the NESARC and the wine databases, and K = 50 for the remainder.",
+                    "annotation_spans": [
+                        {
+                            "start": 189,
+                            "end": 218,
+                            "text": "NESARC and the wine databases",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To the best of our knowledge, we are the first to study methods of reducing the complexity of greedy by exploiting the inherent geometry of the datasetnamely, that it consists of pairwise comparisons.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the unbalanced NTCIR4 testing dataset, in contrast to other merging methods, ours also significantly outperforms the baseline; this consequence arises main because our method can overcome the problem of unbalanced distribution by means of FRank's ability of coordinating the proposed features.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 40,
+                            "text": "NTCIR4 testing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Kicking out wrong labels from large-scale FR datasets is still very expensive, although some cleaning approaches are proposed.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 53,
+                            "text": "FR datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Connectionist modelers claim that the same phenomena can be captured in a single-route model which learns simply by exposure to a representative dataset .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments using NIPS 0-12 dataset validates the effectiveness of our model for the transfer learning task over NIPS sections.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 39,
+                            "text": "NIPS 0-12 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ref #text  Collection Topic length Examining Disagreements on Graded Judgements Topic Effects Distance.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "City Street: We collected a multi-view video dataset of a busy city street using 5 synchronized cameras.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We focus on the self-training approach , which involves creating an initial baseline model on fully labeled data and then using this model to estimate labels on a novel weakly-labeled or unlabeled dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results are presented for a collection of simulated datasets for which the correct cluster structure is known unambiguously.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our goal is to leverage the existing judgments for corpus Ds and create tuples of the same form for the target corpus",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 60,
+                            "text": "Ds",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Running at \u223c30 fps on a single GPU at single scale, it achieves highly competitive results on the PoseTrack benchmarks.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 118,
+                            "text": "PoseTrack benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We plot the objective value of four small datasets for varying k, z, and m. Sequential vs. Distributed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We randomly split the data set into two subsets of the same size",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Across virtually all test collections used in IR research, topics have a single interpretation, which is explicitly defined in the topic's description and/or narrative and implicitly defined in its relevance judgements.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We present the overall learning algorithm in Section 4 and evaluate its performance on different datasets in Section 5.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Simulations with Gaussian datasets indicate the running times can be shortened by, on average, as much as 60%.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 34,
+                            "text": "Gaussian datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In  we increase the length of the time series in datasets MS-M and MS-J to study the convergence behavior of GLG.",
+                    "annotation_spans": [
+                        {
+                            "start": 58,
+                            "end": 62,
+                            "text": "MS-M",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 67,
+                            "end": 71,
+                            "text": "MS-J",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For our experiments we use the following two real-world datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, compared to the results using all the data (row 5), it is clear that IPM-reduced dataset generates the closest results to the complete dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, in most real-life applications collected datasets may not cover all factors to discover causal relations of interest.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The number of sequences in the training data set is finite.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Examining performance for all the methods across the three datasets, the methods performed best on ACE, worst on LGL, and in the middle for CLUST.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 102,
+                            "text": "ACE",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 113,
+                            "end": 116,
+                            "text": "LGL",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 140,
+                            "end": 145,
+                            "text": "CLUST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe that visual clustering and contextual similarity both show good generalizability and our full model outperforms all the other methods on the testing set of both datasets, with 3.43% higher than framelevel MIL baseline in RoboWatch, proving our method has a good generalization ability.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Despite the high interest in this task and ongoing emergence of new datasets  and approaches , it remains a highly challenging problem.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Music datasets, MudRecS achieves a RMSE score of 0.72 and 0.80, respectively, which imply that, on the average, an incorrectly assigned rating by MudRecS is less than 1 star away from its correct assignment.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this experiment, we made use of the OHSUMED dataset  to test the performances of AdaRank.",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 54,
+                            "text": "OHSUMED dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Repository  provides code and models fine-tuned on the Charades dataset to extract I3D video features.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 71,
+                            "text": "Charades dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the canonical co-training method we repeat 50 times and in each iteration add the most predictable 1 positive sample and r negative samples into the training set where r depends on the number of negative/positive ratio of each data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we summarize statistics of datasets with three quantities: the dimensionality of data, the number of training and testing samples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the Epinions dataset 1 in our experiments.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 29,
+                            "text": "Epinions dataset 1",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, they are mostly based on the assumption that the classes are the same between both domains , which does not hold in the RE-ID context where the persons (classes) in the source dataset are completely different from the persons in the target dataset, rendering these UDA models inapplicable to the unsupervised RE-ID .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We omit scores on MNIST and CelebA as the scores on these two datasets are not widely reported, and different preprocessing (such as the center crop size of CelebA) can lead to numbers not directly comparable.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 34,
+                            "text": "MNIST and CelebA",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 157,
+                            "end": 163,
+                            "text": "CelebA",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on DUC datasets show the proposed joint method achieves better performance than the manifold-ranking baselines and several popular methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 27,
+                            "text": "DUC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To provide snippets, a search engine must store a (simplified) version of the documents in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation on a VLN benchmark dataset shows that our RCM model significantly outperforms previous methods by 10% on SPL and achieves the new state-of-the-art performance.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 37,
+                            "text": "VLN benchmark dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On this dataset, we detect three different actions: horse-riding, running, and diving.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For all datasets except HTTP, we can see most outliers are included in the CISO training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 28,
+                            "text": "HTTP",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 75,
+                            "end": 92,
+                            "text": "CISO training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The IR systems to be evaluated are used to run the topics (formulated as queries) against the document corpus to produce a ranked list of documents or run for each topic.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "According to the label definition in the SCUT-CTW1500 , we set n to 7 in the curved text detection experiments 4.5 and to 2 when dealing with text detection in such benchmarks  labeled with quadrangle annotations considering the dataset complexity.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since our main focus is on binary classification, for the two MNIST datasets we evaluate distinguishing 2's from any other digit, which we call MNIST-2 and MNIST8m-2.",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 76,
+                            "text": "MNIST datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 144,
+                            "end": 165,
+                            "text": "MNIST-2 and MNIST8m-2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MixD data set contains five clusters that differ greatly in densities and sizes, including a huge cluster on the left generated by Gamma distribution, which has relatively low density, and four small clusters on the right respectively generated by normal distribution, Gaussian distribution, and chi-square distribution with relatively high density.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "MixD data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "sd's in the training dataset and estimate u's curiosity distribution as described in Sectionrefsubsec:est.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "FCT has the advantage of higher grounding accuracy on the vocabulary but sacrifices the generalizability to other datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Even if the image collection does not contain any labels, it has been shown to help tasks such as image completion and exploration , image colorization , and 3D surface layout estimation .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact, an itemset which is not closed in a partition may be closed when considering the whole dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Other datasets also statistically exhibit similar phenomena.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe that the accuracy of our defense degrades rapidly as the overlap between the attack and the defense database grows.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the CIFAR-10 and CIFAR-100 datasets, dasNet outperforms the previous state-of-the-art model on unaugmented datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 38,
+                            "text": "CIFAR-10 and CIFAR-100 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the TV data set, decreasing kernel width h improved classification accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 19,
+                            "text": "TV data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report the measures on the 30 SRs in the test set of CLEF17",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Google Landmark Retrieval Challenge dataset consists of 1, 093, 647 database images and 116, 025 query images.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 47,
+                            "text": "Google Landmark Retrieval Challenge dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Books dataset  contains m = 20 images of multiple books on a \"L\" shaped study table , and suffers geometrical ambiguities similar to the above with severe occlusion.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "Books dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments are conducted using 2.2k queries, over various numbers of shards and replicas for the large GOV2 collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 119,
+                            "text": "GOV2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On both dataset, N-grams achieves the best performance in all metrics in terms of a single feature.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In case of lexicon based recognition for IAM dataset, we use all the unique words present in the dataset, whereas we use lexicon provided in ICDAR 2011 competition for RIMES dataset and the lexicons provided with the original dataset are used for IndBAN and IndDEV datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 52,
+                            "text": "IAM dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 168,
+                            "end": 181,
+                            "text": "RIMES dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 247,
+                            "end": 273,
+                            "text": "IndBAN and IndDEV datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the improvement is slight on the MNIST dataset, and the performance even drops on more complicated datasets such as FMNIST and CIFAR-10.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 55,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 125,
+                            "end": 144,
+                            "text": "FMNIST and CIFAR-10",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Automaton for the yeast data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 32,
+                            "text": "yeast data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We generate runs on the full collection and on subsets defined by taking the top n documents returned by a standard document retrieval run when using the topic as query.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test our model both qualitatively and quantitatively on the AVSpeech dataset  and the VoxCeleb dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 79,
+                            "text": "AVSpeech dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 105,
+                            "text": "VoxCeleb dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use four data sets to evaluate the CPM: (1) an MNIST dataset consisting of labeled handwritten digits encoded in 28 \u00d7 28 gray scale pictures  (60,000 training and 10,000 testing instances); (2) an MNIST8m dataset consisting of 8,100,000 pictures obtained by applying various random deformations to MNIST training instances MNIST ; (3) a URL dataset  used for malicious URL detection  (1.1 million training and 1.1 million testing instances in a very large dimensional space of more than 2.3 million features); and (4) the RCV1-bin dataset  corresponding to a binary classification task (separating corporate and economics categories from government and markets categories ) defined over the RCV1 dataset of news articles  training and 677,399 testing instances).",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 63,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 200,
+                            "end": 215,
+                            "text": "MNIST8m dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 525,
+                            "end": 541,
+                            "text": "RCV1-bin dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 694,
+                            "end": 706,
+                            "text": "RCV1 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 340,
+                            "end": 351,
+                            "text": "URL dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The last disatvantage of this approach is that in order to perform the mergin efficiently, each local collection of closed itemsets should be stored in main memory.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the semi-supervised GANs, we follow the standard settings in  and train our GANs models on both datasets by utilizing Adam optimizer with a mini-batch size of 128.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Hopkins155 database consists of 156 video sequences (hence 156 subspace clustering tasks), with 2 or 3 motions in each video.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "Hopkins155 database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although this makes the job of WSDDN much easier, it will miss a lot of objects in the dataset since the proposals have low recall.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A convenient API allows for implementing reproducible experiments based on retrieving documents from the ClueWeb09 corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 121,
+                            "text": "ClueWeb09 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "an extreme case, all the documents need to be ordered when n equals the number of documents in the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the evaluation, we use SIFT  descriptors throughout and use a single 6K-word visual vocabulary, pre-computed using the Dubrovnik dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 140,
+                            "text": "Dubrovnik dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to avoid degenerate solutions, we incorporate the notion of class balance: we prefer configurations in which category labels are assigned evenly across the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We divide the TREC and MSN datasets into training and test sets evenly.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 35,
+                            "text": "TREC and MSN datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ", N is the frequency of total terms in the data set, and Count(wi)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the widely used Multi30K dataset, the experimental results of our approach are significantly better than those of the text-only UNMT on the 2016 test dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 35,
+                            "text": "Multi30K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the 20 Newsgroups dataset, the accuracies of the latent SMM, MedLDA and word2vec+SMM were proximate and better than those of SVD+SMM and SVMs.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 29,
+                            "text": "20 Newsgroups dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Previous approaches on multi-label learning with incomplete label assignments  are mainly designed for small/moderatesized datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There is a danger, however, that the true worth of these methods has not been fully realized by the research community because there are no publicly available test collections that have ambiguous topics and a range of relevance judgments that cover more than one interpretation of such topics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To train our network, we collect a CBCT dataset from some patients before or after orthodontics.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 47,
+                            "text": "CBCT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe that the proposed method can improve the clustering performance whether in digital datasets or in other product dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The fundamental difference between LOD and standard entity datasets like, for instance, Wikipedia, lies in the inherent structure of the data.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 38,
+                            "text": "LOD",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The other data set is a subset of the RCV1-v2 text data set, provided by Reuters and corrected by Lewis et al. .",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 59,
+                            "text": "RCV1-v2 text data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare AS-GCN on skeleton-based action recognition tasks with the state-of-the-art methods on the data sets of NTU-RGB+D and Kinetics.",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 124,
+                            "text": "NTU-RGB+D",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 129,
+                            "end": 137,
+                            "text": "Kinetics",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Weizmann dataset: The Weizmann human action dataset contains 83 video sequences showing nine different people, each performing nine different actions: running, walking, jumpingjack, jumping-forward-on-two-legs,jumping-in-place-on-two-legs, galloping-sideways, wavingtwo-hands, waving-one-hand, bending.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 16,
+                            "text": "Weizmann dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 22,
+                            "end": 51,
+                            "text": "Weizmann human action dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the closed itemsets extracted from P partitions of the original dataset D, then the following surely holds:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our second dataset (\"dataset 2\") is extracted from the Bitterlemons corpus, which is a collection of 594 opinionated articles about the Israel-Palestine conflict.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 74,
+                            "text": "Bitterlemons corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the results on the test-dev or test-standard splits, both train and val splits are used for training, and a subset of VQA samples from Visual Genome  is also used as the augmented dataset to facilitate training.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The adversary has full knowledge of our modeling choice, meaning that they know the form of (6), and seeks to perturb the data to maximally increase our loss on the test set, to which they also have access.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply k-means discretization on the training data set to get the discretization threshold, and transform the training data set into discrete sequences.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, it was decided to use this collection to study ambiguity of Proper nouns.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset consists of 6 images of the same 38 points.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, in order to ensure valid testing, we removed near duplicates of Caltech256 images from the source training set by a human-supervised process.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example,  presents two drug data from different graph datasets in the National Cancer Institute (NCI) database.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 114,
+                            "text": "National Cancer Institute (NCI) database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "is result shows our proposed models are easy to be scaled up over larger datasets without harming the performance and computational cost.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, using OpenPose on Kinetics dataset results in 17M skeletons with at least 10 joints, but only 9M complete skeletons (14 joints).",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 47,
+                            "text": "Kinetics dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "[1] data set to evaluate IDC on real, labeled data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the real Mammals presence and DNA Amplification databases.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 64,
+                            "text": "Mammals presence and DNA Amplification databases",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Test collections must include many topics because individual topic results are highly variable .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, we use the 64115 images in the 2017 COCO training set that have a person annotation for training.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 67,
+                            "text": "2017 COCO training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "reports the averaged recognition accuracies on the target domains, using either the original datasets/domains or the identified domains as the source domains.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The evaluation is based on three datasets which have been used for summarization evaluations: a sample of texts from the Brown corpus (30 documents), a set of full articles from JAIR (141 documents), and a random set of Wikipedia articles (512 documents).",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 133,
+                            "text": "Brown corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 178,
+                            "end": 182,
+                            "text": "JAIR",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 220,
+                            "end": 238,
+                            "text": "Wikipedia articles",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As future work, besides testing our features on different time-aware document collections, we aim at adopting machine learning techniques to combine the proposed features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The semantic score is based on a Canonical Correlation Analysis (CCA) retrieval model  which brings the image into the scoring loop by training on the combination of COCO  and SBU  (\u223c1M images), ensuring a larger exposure of the score to diverse visual scenes and captions, and lowering the COCO dataset bias.",
+                    "annotation_spans": [
+                        {
+                            "start": 291,
+                            "end": 303,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the MNIST and SMPL datasets, the generative factors are discrete instead of continuous, so we change the standard linear regression network to a kNN-classification algorithm.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 31,
+                            "text": "MNIST and SMPL datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used a 2.7 GB dataset with points in a 3dimensional space for evaluating our implementation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Session Track dataset consists of 60 di erent topics.",
+                    "annotation_spans": [
+                        {
+                            "start": 2,
+                            "end": 25,
+                            "text": "e Session Track dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After convergence, we aggregate the heatmaps generated on test dataset and then visualize the results in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note we cannot evaluate this quantity on the exchange dataset, as we do not know the true distribution.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 61,
+                            "text": "exchange dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the largest currently available dataset, the Million Song Dataset (MSD) is a collection of audio features and metadata for a million contemporary popular music tracks.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 74,
+                            "text": "Million Song Dataset (MSD)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We believe that this collection can be useful for further research on detecting sentiment change triggers, sentiment analysis and sentiment prediction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We illustrate each dataset (a, b, c, d) with an image sample, see text for definitions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data is from the CoNLL-2000 shared task  , in which 8926 English sentences from the Wall Street Journal corpus are used for training and 2012 sentences are for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 114,
+                            "text": "Wall Street Journal corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, we evaluate the performance of POLYGLOT-NER on CONLL datasets to demonstrate the effectiveness of the proposed solutions to deal with missing links in the Wikipedia markup.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 74,
+                            "text": "CONLL datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An additional  The document collection created in the competition is too small to effectively learn ranking functions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On CUB, AWA2, and APY datasets, the proposed methods without calibrated stacking (CS) in H are comparable to/better than current state-of-the-art methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 30,
+                            "text": "CUB, AWA2, and APY datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In fact, we show that for sparse binary data, scoring all items in a large data set can be accomplished using a single sparse matrix-vector multiplication.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results for the CiteSeer dataset are presented in .",
+                    "annotation_spans": [
+                        {
+                            "start": 20,
+                            "end": 36,
+                            "text": "CiteSeer dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, we obtained a total of 7,200 (= 300 \u00d7 24) time-travel queries for the UKGOV dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 89,
+                            "text": "UKGOV dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As an alternative, object detector  trained on the Visual Genome dataset  is employed to extract pretrained visual features .",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 72,
+                            "text": "Visual Genome dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On two complex Bayesian models and several large data sets, we found that population variational Bayes usually performs better than existing approaches to streaming inference.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe similar performance in the remaining datasets, shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A total of 21213 entitydocument-topic judgements were obtained in the collection 2 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first \"project\" the image on to the image manifold by finding the nearest neighbors in the image database, followed by a weighted combination of the predictions of this nearest neighbor set to produce our final prediction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the 3 larger datasets, we used a mini-batch size of 100 and 2 Monte-Carlo samples during training.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We treat word pairs in the following collection D * as highconfidence predictions to be added to the training set:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "that h should be increasing with the term frequency, and decreasing with the document length and the document/collection frequency.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our single-view model already outperforms the other singleview models,demonstrating the benefit of the MC dataset for training.",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 113,
+                            "text": "MC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this subsection we use the Million Song Dataset (MSD) , which consists of 1,129,318 users (|X |), 386,133 songs (|Y|), and 49,824,519 records (|\u2126|) of a user x playing a song y in the training dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 56,
+                            "text": "Million Song Dataset (MSD)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The termtopic matrix U uncovers the latent topic structure of the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "> tags from the HTML data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 29,
+                            "text": "HTML data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The GQA dataset centers around real-world reasoning, scene understanding and compositional question answering.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 15,
+                            "text": "GQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate, through analysis of two sample data sets, the power of the proposed analysis and visualization tools for increasing the amount and quality of information about event-related brain dynamics that can be derived from single-trial EEG data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our anomaly score is derived from the statistical p-value to measure how extremely ranked a collection is, which captures the following principles -A collection is more anomalous if: (I) it contains a larger number of entities that are ranked at more extreme positions on some feature; and (II) it contains entities that are consistently ranked at extreme positions across more features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ese test collections are then used later by researchers to evaluate their systems.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Comparisons of the performance (%) on different datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second dataset contains instances of 5 different indoor activities (e.g. computer usage, meal, meeting, watching TV and sleeping) from a single user.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "GP modelling is a powerful non-parametric framework for performing classication and regression, however, exact GPs are typically restricted to small datasets since they require OpN 3 q time and OpN 2 q storage where N is the number of training points .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A collection spanning 9 days was used (882 images overall), all images, good and bad were considered, and no image adjustments were performed.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the RobotCar dataset, it performs similarly to other methods on the dusk sequence, where the accuracy tends to saturate.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 23,
+                            "text": "RobotCar dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we left 5 subjects out from the training datasets to assess the performances of the algorithm on unseen people.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When the pool size is small, there are more noises in CISO training set, and its performance is a little lower than BCISO.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 71,
+                            "text": "CISO training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It uses pseudo-test collections  to learn to fuse ten well-established retrieval algorithms and implements a number of query, tweet, and query-tweet features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At training time, images are excluded from the MSCOCO training set if their caption labels contain the novel objects.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 66,
+                            "text": "MSCOCO training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The real world datasets include are taken from the UCI Machine Learning  and the 10X Genomics repository .",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 104,
+                            "text": "UCI Machine Learning  and the 10X Genomics repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The last dataset is the \"MovieLens\" dataset, which contains ratings for 1,682 movies by 943 users on a five-point scale.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 43,
+                            "text": "\"MovieLens\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "into Eqn. , we can obtain the relevance scores for all questions in the question corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 87,
+                            "text": "question corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Despite this trend, standard recognition benchmarks like ImageNet , CIFAR-10, and CIFAR-100  heavily curate their data to ensure that classes remain nicely balanced and easily separable.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 91,
+                            "text": "CIFAR-100",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 57,
+                            "end": 65,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 68,
+                            "end": 76,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To guard against the possibility that these results are a by-product of the fact that English has basic word order SVO, we repeat the method discussed above using utterances involving singly transitive verbs taken from the \"Asato\", \"Nanami\" and \"Tomito\" transcripts in the MiiPro corpus of the CHILDES database, which is in Japanese (basic order SOV).",
+                    "annotation_spans": [
+                        {
+                            "start": 294,
+                            "end": 310,
+                            "text": "CHILDES database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 273,
+                            "end": 286,
+                            "text": "MiiPro corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have illustrated the results for 3-dimensional range queries in 100-and 200-dimensional data sets respectively in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following the experimental strategies used in , for each user, we sample N ratings for training, and sample 10 ratings from training set to tune hyper parameters (validation set).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To this end, we collect 40 human images in which 29 are from the internet whose alpha mattes are carefully matted by designers and 11 are from composition-1k testing dataset in  due to their abundant details.",
+                    "annotation_spans": [
+                        {
+                            "start": 143,
+                            "end": 173,
+                            "text": "composition-1k testing dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Some of the bounds provide interesting justifications for previously used methods, and we show improved performance over random label selection and baseline submodular maximization methods on several real world data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unfortunately, computing an SVD of a very large dataset is an impractical affair, requiring complete data, run-time quadratic in the dataset size, and in-memory storage of the entire dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this experiment, we evaluated the efficiency improvement of GRLSI and GNMF over RLSI and NMF on the Wikipedia dataset and the Web-I dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 103,
+                            "end": 120,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 129,
+                            "end": 142,
+                            "text": "Web-I dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ILSVRC 2012 classification dataset  consists of about 1.2 million images for training, and 50, 000 for validation, from 1, 000 classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 38,
+                            "text": "ILSVRC 2012 classification dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "2000 training examples as a validation set to choose the margin penalty parameter; af this parameter by cross-validation, we then retrained each SVM using all the training ex reference, we also report the best results obtained previously from three layer deep belief 3) and SVMs with RBF kernels (SVM-RBF).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On both datasets, TT, IF, and FS perform comparably and RFS performs best.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our first data set is a subset of Newsgroup corpus, which contains 3970 documents with 8014 dimensional TFIDF features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dimensions of the layers were 100-100-2 for Augmented MNIST dataset and 2048-2048-2 for the rest of the datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 71,
+                            "text": "Augmented MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To be specific, we collect 1270 prediction results of each dataset, and ask 40 people to provide subjective assessment on them.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experimental results on TREC and DBLP data sets suggest that the proposed method is very effective in obtaining relevant results to the querying inputs.",
+                    "annotation_spans": [
+                        {
+                            "start": 34,
+                            "end": 57,
+                            "text": "TREC and DBLP data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The MSN-1 dataset consists of 31,351 queries and 136 features extracted from 3,771,125 query-document pairs, while the Istella dataset is composed of 33,018 queries and 220 features extracted from 3,408,630 query-document pairs.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "MSN-1 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 134,
+                            "text": "Istella dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, rotation and elongation being a dominant variation in the dataset while localized changes being less dominant among characters is reflected by the variance of the activity vector.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We find that the causal accuracies and F1 scores of both datasets are similar and the algorithms in the table cannot recover most edges of the ground-truth causal graph.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "proposed techniques to disambiguating collections of Web appearances using Agglomerative and Conglomerative Double Clustering.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct a comprehensive set of experiments on three real-world datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To increase the challenges and difficulties in the RAVEN dataset, we further append 2 types of noise attributes-Uniformity and Orientation-to Layout and Entity, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 64,
+                            "text": "RAVEN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare the marginal probability distributions of the same variables in the real dataset and the simulated dataset as shown in  and .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this experiment, we addressed the task of text classification using 20newsgroups dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 91,
+                            "text": "20newsgroups dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Details of the TREC benchmarks are provided in .",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 30,
+                            "text": "TREC benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on three building instance segmentation datasets demonstrate our DAR-Net achieves either state-of-the-art or comparable performances to other competitors.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here we used 10 \u00d7 10 pixel patches, with a spacing of respectively 2 and 5 pixels for the Sowerby and Corel datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 116,
+                            "text": "Sowerby and Corel datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The need for a methodology that would (a) be computationally tractable for massive training data sets without pre-processing, and (b) jointly optimize the feature selection, segmentation and modeling steps, was the impetus for the development of IBM ATM-SE TM solution (Advanced Targeted Marketing -Single Events) for direct mail retailing .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a result, the ratings may be more reliably captured in this dataset compared to others.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , the DM-GAN improves the Rprecision by 4.49% on the CUB dataset and 3.09% on the COCO dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 76,
+                            "text": "CUB dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 94,
+                            "end": 106,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Is there a principled way to learn a probabilistic discriminative classifier from an unlabeled data set?",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "D A (see ), and D [B,D] (see ), where D [B,D] is the projected dataset obtained by merging D B , D C , and D D .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When running the queries on the smaller NTCIR-11 Wikipedia collection, Tangent-2 requires 8 minutes to execute the 100 test queries using a parallelized index with nine subindices on Amazon Web Services .",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 69,
+                            "text": "NTCIR-11 Wikipedia collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We outperform the state-ofthe-art on the challenging Occluded-LINEMOD and YCB-Video datasets, which is evidence that our approach deals well with multiple poorly-textured objects occluding each other.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 92,
+                            "text": "Occluded-LINEMOD and YCB-Video datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Such data sets grant unprecedented insight into the temporal and spatial structure of neural activity and will hopefully lead to an improved understanding of neural coding and computation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In all datasets, SCARGC produces accurate classifiers using a small amount of initial labeled data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each target effectiveness level, we generate one summary per topic within the three TREC-TS test collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 112,
+                            "text": "TREC-TS test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compute the intersectionover-union (IoU) score between the ground-truth target segmentation and the same centered bounding box used to select views during dataset generation (see Section 4.3).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From left to right the blocks are: (1) a convnet trained on STL-10 (2) an MLP trained on MNIST, (3) a convnet trained on CIFAR-10, (4) Reconstruction ICA trained on Hyv\u00e4rinen's natural image dataset (5) Reconstruction ICA trained on STL-10.",
+                    "annotation_spans": [
+                        {
+                            "start": 60,
+                            "end": 66,
+                            "text": "STL-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 89,
+                            "end": 94,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 121,
+                            "end": 129,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 165,
+                            "end": 198,
+                            "text": "Hyv\u00e4rinen's natural image dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 233,
+                            "end": 239,
+                            "text": "STL-10",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, NMF is believed to learn part-based representations of dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, although SF t u,i , SR t u,i and Dissim t u,i are defined based on the music recommendation dataset, it is not difficult to extend them to other domains such as restaurant recommendation and movie recommenda-",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in  and , accurate results were achieved in the CASIA-SURF dataset for traditional metrics, e.g. APCER=3.8%, NPCER=1.0%, ACER=2.4%.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 75,
+                            "text": "CASIA-SURF dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One important difference between the data sets is that the typical distance to the nearest neighbor is smaller in the SIFT data set, which can make the nearest neighbor problem easier (see Appendix D).",
+                    "annotation_spans": [
+                        {
+                            "start": 118,
+                            "end": 131,
+                            "text": "SIFT data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows some examples of the evolutionary community paths detected in Enron email dataset from Jun. 1, 2001 to Aug. .",
+                    "annotation_spans": [
+                        {
+                            "start": 68,
+                            "end": 87,
+                            "text": "Enron email dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This processing step may include filtration of samples with unreliable labels, (1) Source dataset processing, P \u2190 our focus (2) Background learning to rank algorithm, A (3) Evaluation of the ranker on the target dataset (4) Feedback for step (1) optimization \u2190 our focus aggregation of labels based on majority voting among workers, reweighting of samples, etc.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We propose an effective deep learning approach to aesthetics quality assessment that relies on a new type of pretrained features, and apply it to the AVA data set, the currently largest aesthetics database.",
+                    "annotation_spans": [
+                        {
+                            "start": 150,
+                            "end": 162,
+                            "text": "AVA data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to analyze the identity consistency, we measure the feature distance between the generated paired images on the CASIA NIR-VIS 2.0 database.",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 147,
+                            "text": "CASIA NIR-VIS 2.0 database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "; see http://www.siam.org/journals/ojsa.php Inputs: E:a set of spatial event types ST :a spatio-temporal dataset <event type, instance",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As an inspiring example to our work in this paper, the ImageNet dataset  contains annotated images from 21,841 classes in total.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 71,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, performance is better in IndBAN and IndDEV dataset than other two datasets inspite of our claim of having more complexity in Bangla and Devanagari script.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 56,
+                            "text": "IndBAN and IndDEV dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on CIFAR-10 , ImageNet , and ImageNet-10, where we manually choose 10 coarse-grained classes from the whole dataset, e.g. bird, car, cat, etc.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 34,
+                            "text": "CIFAR-10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 37,
+                            "end": 45,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 52,
+                            "end": 63,
+                            "text": "ImageNet-10",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, in TREC collections longer documents are more likely to be relevant than shorter documents .",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 32,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can observe that  : Performance Comparison on Different Users the user-item rating matrix of Epinions is very sparse, since the densities for the two most famous collaborative filtering datasets Movielens (6,040 users, 3,900 movies and 1,000,209 ratings) and Eachmovie (74,424 users, 1,648 movies and 2,811,983 ratings) are 4.25% and 2.29%, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 198,
+                            "end": 207,
+                            "text": "Movielens",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 262,
+                            "end": 271,
+                            "text": "Eachmovie",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, our experimental results on KITTI , Driving  datasets show that our method is effective to solve this new problem.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 42,
+                            "text": "KITTI",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 45,
+                            "end": 62,
+                            "text": "Driving  datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Common setup: For all methods, we used one-hidden-layer neural networks with the rectified linear units (ReLU) as activation functions, where the number of hidden units is 3 for synthetic datasets, and 50 for benchmark datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Incorporating Features: Both matrix factorization and tensor factorization discussed above do not directly incorporate explicit features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments were conducted with two publicly available datasets: the MSN-1 1 (Fold 1) dataset and a new dataset provided by Istella 2",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 93,
+                            "text": "MSN-1 1 (Fold 1) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The performance of object detectors has been dramatically improved thanks to the advance of deep convolutional neural networks  and well-annotated datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate classification performance with accuracy averaged over 10 random training/testing splits with the exception of the CIFAR10 dataset, where we report the accuracy on the test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 127,
+                            "end": 142,
+                            "text": "CIFAR10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the execution time for each iteration while factoring the synthetic dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the former, we divide each dataset into three object subsets based on the number of tags available in the object, so that each subset contains roughly the same number of objects, as shown in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, we report results from weakly-supervised models whose code is available or whose results on Charades, HICO-DET or EPIC KITCHENS datasets is readily available.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 142,
+                            "text": "Charades, HICO-DET or EPIC KITCHENS datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With such datasets, we can evaluate how unmeasured confounding influences the results of causal discovery algorithms and hopefully develop new and better algorithms to address this issue.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, we compare Automatic Frankensteining on a large scale data set to more than 3,500 machine learning expert teams and are able to outperform more than 3,000 of them within 12 CPU hours.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We combine both these sources in a single model: where P K B is based on the knowledge base and P T C is the estimate based on the table corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 131,
+                            "end": 143,
+                            "text": "table corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset allows us to compare our method against ground truth which was obtained using high-standard GPS/IMU sensors.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the non-sparse datasets, Adagrad performs similarly to the Newton-type method (often better in early iterations and worse later), except for the alpha dataset where the step-size is harder to tune (the best step-size tends to have early iterations that make the cost go up significantly).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As an example,  contains two different views of a dataset, an image view and a text view.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The data set that we use as our target social media collection is the Blogs08 collection provided by TREC; the collection consists of a crawl of feeds, permalinks, and homepages of 1.3M blogs during early 2008-early 2009.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 88,
+                            "text": "Blogs08 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Besides the two reasons (huge intra-class visual diversities and huge inter-class visual similarities) which have discussed above, another key reason for this phenomenon in ImageNet1K dataset is that: (a) Some hard object classes  are from the leaf nodes of the concept ontology with longer depths, there may have multiple visually-similar object classes which are hard to be distinguished from each other, e.g., fine-grained hard object classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 173,
+                            "end": 191,
+                            "text": "ImageNet1K dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Flickr data set consists of 186,577 image-tag pairs pruned from the NUS data set 5  by keeping the pairs that belong to one of the 10 largest classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 19,
+                            "text": "Flickr data set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 72,
+                            "end": 84,
+                            "text": "NUS data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When comparing the performance of the FD baseline and its corresponding hypergraph H-FD, (c) demonstrates that the inclusion of the global factor results in an improved retrieval effectiveness (in terms of MAP) for all collections, and in statistically significant improvements for the Robust04 and Gov2 collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 286,
+                            "end": 315,
+                            "text": "Robust04 and Gov2 collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We do not use any service-or dataset-specific heuristics or features (like the rank of users) in our classification model; therefore our approach should be usable in any discussion board.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We will introduce COIN from the following aspects: the establishment of lexicon, a new developed toolbox for efficient annotation, and the statistics of our dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Some exemplar images are shown in , where each row corresponds to an original dataset, and each column is an identified domain across two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this experiment, we tested the effectiveness of GRLSI and GNMF by using the topics generated by them with the Web-I dataset in search relevance ranking on the Web-II dataset  .",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 176,
+                            "text": "Web-II dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 113,
+                            "end": 126,
+                            "text": "Web-I dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a set of POIs L = {l 1 , . . . , l n }, we denote the collections of their associated photographs by I = {i 1 , . .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the literature there are two main evaluation protocols on the Human3.6M dataset using subjects 1, 5, 6, 7, 8 for training and subject 9, 11 for testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 82,
+                            "text": "Human3.6M dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tried various mini-batch sizes from 1 to 16,384, using the best learning parameters for each mini-batch size found in the previous study of the Nature corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 147,
+                            "end": 160,
+                            "text": "Nature corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The new method produced good results on the Accident dataset investigated here, as well as in simulations.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 60,
+                            "text": "Accident dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since, in Terabyte track, the size of the document collection is very large, the systems may continue retrieving relevant documents even at high ranks (deeper in the list).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Along with our LFM algorithm, we introduce Camera-Display 1M, a dataset of 1,000,000 image pairs generated with 25 camera-display pairs.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 60,
+                            "text": "Camera-Display 1M",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that if the solution is not unique, the possible values of b fall on an interval of the real line: in this case a suitable choice would be one that minimizes an estimate of the Bayes error, where the SVM output densities are modeled using a validation set 6 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It aims at maximizing the likelihood that the selected subgraphs are significant in the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , for S2-CS dataset, SetRank's ranking performance first increases as \u03bb E increases until it reaches 0.7 and then starts to decrease when we further increase \u03bb E .",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 31,
+                            "text": "S2-CS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For all experiments on LFW , we train ResNet-50 models  with batch size of 512 on the cleaned WebFace  dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 94,
+                            "end": 110,
+                            "text": "WebFace  dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Indeed, LR is much faster than SVM: in their thresholded cost-sensitive versions, the timings for LR on News20 and Siam datasets are 6,400 and 8,100 seconds, versus 255,000 and 147,000 seconds for SVM respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 104,
+                            "end": 128,
+                            "text": "News20 and Siam datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, SIS could not scale to Yale Faces and other gene expression datasets due to the low scalability of its optimization.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 42,
+                            "text": "Yale Faces",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Additionally, PNRL-R performs better than PNRL-C on Email, PowerGrid and CondMat datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 89,
+                            "text": "Email, PowerGrid and CondMat datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "99.7 Context Aware (ours) 103.3 118.6 : CIDEr performance of captioning systems given various attention mechanisms, Att2All , sentinel attention  and Context Aware attention on COCO validation set.",
+                    "annotation_spans": [
+                        {
+                            "start": 177,
+                            "end": 196,
+                            "text": "COCO validation set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second set of experiments considers the task of discriminating between handwritten characters of 5 and 6 from the MNIST dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 118,
+                            "end": 131,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In principle we could evaluate the support of a rooted tree from scratch by means of known algorithms for graph homomorphism , or by means of SQL database queries .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we use one of the largest tagging datasets available, the Stanford Tag Crawl 2007 dataset based on the del.icio.us social bookmarking site.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 104,
+                            "text": "Stanford Tag Crawl 2007 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To form the gallery set for the WL-DukeMTMC-REID dataset, we first randomly split the raw video into short video clips, with each clip comprising between 20 and 120 raw frames.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 56,
+                            "text": "WL-DukeMTMC-REID dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The OTB2015  dataset is one of the most popular tracking benchmarks which consists of 100 challenging image sequences with 11 different attributes, such as illumination variation (IV), scale variation (SV), occlusion (OCC), deformation (DEF), motion blur (MB), fast motion (FM), in-plane Rotation (IPR), out-of-plane rotation (OPR), out-of-view (OV), background clutters (BC) and low resolution (LR).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "OTB2015  dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To populate the Cognos expertise database, we started to crawl all the Lists containing all Twitter users.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 41,
+                            "text": "Cognos expertise database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar results can also been found on the Graded TD2003 datasets as shown in the upper part of .",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 65,
+                            "text": "Graded TD2003 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "for data sets, such as aPY, CUB, SUN, AWA1 and AWA2.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 26,
+                            "text": "aPY",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 28,
+                            "end": 31,
+                            "text": "CUB",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 33,
+                            "end": 36,
+                            "text": "SUN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 38,
+                            "end": 42,
+                            "text": "AWA1",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 47,
+                            "end": 51,
+                            "text": "AWA2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use a similar setup in the second set of experiments, now using linear SVM instead of regression (we run only on the classification datasets).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows an experimental evaluation of the learning rate with this thresholding rule, using an artificial 2D dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These models draw their inspiration from a long-standing hypothesis in IR, namely the fact that the difference in the behaviors of a word at the document and collection levels brings information on the significance of the word for the document.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the training images from the Middlebury dataset and only show the time consumed by DTS for the stereo task at 3000 iterations.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 54,
+                            "text": "Middlebury dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In MNIST,    we use 20 exemplars from each of 10 digits from the MNIST data set, reduced via PCA to 20 dimensions, repeating the experiment 50 times.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 79,
+                            "text": "MNIST data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "show the results over ROUGE recall and Fmeasure scores on the DUC 2006 dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 78,
+                            "text": "DUC 2006 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We specify our research questions, describe our data set, and detail our ground truth.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "train on MallScape-B and test on MallScape-A Since the styles of the two venues are significantly different, these splits also ensures that good performance is not due to training set overfitting.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 20,
+                            "text": "MallScape-B",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 33,
+                            "end": 44,
+                            "text": "MallScape-A",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our algorithm on three datasets: retrieving movies from EachMovie, finding completions of author sets from the NIPS dataset, and finding completions of sets of words appearing in the Grolier encyclopedia.",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 135,
+                            "text": "NIPS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The real data set with name < RN ame > and perturbation level u was denoted by < RN ame >.P .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collections Lu on each node used by  to  are not disjoint across nodes, but because a PAC architecture distributes documents uniformly randomly, global statistics are, on average, unaffected.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It jointly learns the embedding spaces of two modalities: vision and language, and aligns them using parallel image-text pairs (e.g., image and captions from the MS-COCO dataset ).",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 177,
+                            "text": "MS-COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We validate our approach on the Saliency dataset  and a more challenging dataset newly collected by us, namely Object Extraction(OE) dataset 1 .",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 48,
+                            "text": "Saliency dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 111,
+                            "end": 142,
+                            "text": "Object Extraction(OE) dataset 1",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Movie genre is used as the equivalent of query aspects in MovieLens, and social tags (assigned to artists by Last.fm users) are used for the same purpose in the music dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, the GP based method requires, on average, 63.5, 50.4 and 30.6 minutes to train on the LastFM, YouTube and YahooVideo datasets, respectively, running on an Intel 2.83GHz Quad-Core with 8GB RAM.",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 139,
+                            "text": "LastFM, YouTube and YahooVideo datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We view the segmentation problem as a collection of separate pixel labeling problems, and directly use knowledge distillation to align the class probability of each pixel produced from the compact network.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To create this dataset, we produced unique scene labels for both the FLIC test set and FLICplus training sets using Amazon Mechanical Turk.",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 109,
+                            "text": "FLIC test set and FLICplus training sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The zoo data set is used for this task because it also has a class label assigned to each instance.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 16,
+                            "text": " zoo data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Scenario: To understand why adversaries and user risks are different from the privacy concerns for structured databases, consider the following scenario.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As compared with BodyNet, a volumetric based prediction method, we have got comparable scores in 3D error on RECON dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 122,
+                            "text": "RECON dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The face images in the AT&T database were taken under well controlled lighting conditions whereas the images in the Yale database were acquired under varying lighting conditions.",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 36,
+                            "text": "AT&T database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 116,
+                            "end": 129,
+                            "text": "Yale database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ProbE is an object-oriented framework specifically designed for segmented predictive modeling with massive training data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The TREC 7 and 8 datasets are used in this study.",
+                    "annotation_spans": [
+                        {
+                            "start": 2,
+                            "end": 25,
+                            "text": "e TREC 7 and 8 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Classification errors (%) of compared methods on SVHN and CIFAR-10 datasets without data augmentation.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 75,
+                            "text": "SVHN and CIFAR-10 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These principles allow Tiled CNNs to achieve competitive results on the NORB and CIFAR-10 object recognition datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 72,
+                            "end": 117,
+                            "text": "NORB and CIFAR-10 object recognition datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In cases where offline clustering on complete document collections is infeasible due to resource and response-rate constraints, online unsupervised clustering methods that process incoming data incrementally are necessary.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We can see from  that risk aversion applies to all five collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first introduce the data used in our experiments as well as our proposed annotated dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also assume access to a collection of synthesized piano sounds for parameter initialization, which we take from the MIDI portion of the MAPS corpus, and a large collection of symbolic music data from the IMSLP library , used to estimate the event parameters in our model.",
+                    "annotation_spans": [
+                        {
+                            "start": 139,
+                            "end": 150,
+                            "text": "MAPS corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Specifically, our dataset consists of about 10 years worth of trading data from January 2000 to March 2011.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Last.fm music dataset is very large, and we take the subset from the last year; for the ml-20m dataset, we take the subset from year 2005 to 2015.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 25,
+                            "text": "Last.fm music dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 92,
+                            "end": 106,
+                            "text": "ml-20m dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we show the impact of the task loss weights on AANet accuracy performance using DukeMTMC-reID dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 106,
+                            "text": "DukeMTMC-reID dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We capture a new 3D human motion dataset with 40 subjects as training and evaluation data for monocular total motion capture.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Automatic Frankensteining finds for 74 data sets a stronger model and only in 3 cases",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Many existing strategies exploit term co-occurrence patterns in previous tag assignments in the collection, expanding an initial tag set",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also evaluate whether other gaze datasets demonstrate subject-specific random effects.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each curve was obtained as an average over the results from 30 different datasets for the two large graphs (Alarm 2 and Hailfinder 2) and from 50 different datasets for all the other Bayesian networks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We illustrate our method on data from a large extract of the RCV1 corpus , processed and made freely available for multiview multilingual learning experiments .",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 72,
+                            "text": "RCV1 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example in TREC Web 2014, which uses the ClueWeb 2012 corpus, the average ERR@20 for topic 278",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 64,
+                            "text": "ClueWeb 2012 corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the CSS dataset is simple, it allows us to perform controlled experiments, with multi-word text queries, similar to the CLEVR dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 24,
+                            "text": "CSS dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 129,
+                            "end": 142,
+                            "text": "CLEVR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Resomce Management database have shown substantial advantages of the context-dependent MLPs over earlier cootextindependent MLPs.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 27,
+                            "text": "Resomce Management database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For smaller data sets the basic method works quite well, and indeed outperforms any enhancement based on using MWUMKL.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The LETOR dataset conveniently extracts many stateof-the-art features from documents, including BM25 , HITS , and Language Model .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "LETOR dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on six benchmarks, including the TotalText and CTW-1500 datasets which contain highly curved texts in natural images, demonstrate that our character-level text detection significantly outperforms the state-of-the-art detectors.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 86,
+                            "text": "TotalText and CTW-1500 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "collected these datasets by asking workers on Amazon Mechanical Turk to rank either four images by the number of dots they contain (Dots), or four states of an 8-puzzle by their distance to the goal state (Puzzle).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As with our earlier results, we see that the performance on the TD2004 dataset using UBM for generation is qualitatively different from the other conditions for the reasons mentioned earlier.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 78,
+                            "text": "TD2004 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, since \u03b4 c depends on the query class, i.e. depends on the query q, we are optimizing two di erent variants of the same quality measure nMCG across the training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For COCO image synthesis, since the training images (INIT dataset) and target images (COCO) are in different distributions, we keep the original size of our training image and crop 360\u00d7360 pixels to train our model, in order to learn more details of images and objects, meanwhile, ignore the global information.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 65,
+                            "text": "INIT dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From these results, comparing the Bayesian versions of MAXQ to standard MAXQ, we observe that for Taxi-World, the Bayesian version converges faster to the optimal policy even with the uninformed prior, while for Resource-collection, the convergence rates are similar.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, in many real-world multi-label learning tasks, it is very hard or expensive to get a fully labeled dataset, especially when the number of classes and/or instances are very large.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each domain from the EuroGOV collection was individually indexed, using language-specific stopword lists, but we did not apply stemming.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 39,
+                            "text": "EuroGOV collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The object annotations for this dataset suffer from some   imprecisions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, experimental results on the NIST datasets show that the sparsity of solution found by the perceptron algorithm is consistently (and often by a factor of two) greater than that of the SVM solution (see   ] and ).",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 54,
+                            "text": "NIST datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, the aim is to utilize the significant subgraphs from related graph datasets to help label graphs of the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The optimization procedure typically converges after 10 epochs (an epoch is a single pass through the dataset).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "See  for a comparison of how these sky models approximate clear skies on a rich outdoor dataset provided by the authors.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In addition, the mixPLDS model leverages the temporal structure in recordings, automatically optimizing for the temporal smoothness level and revealing the main time-constants in the data (in the above data set 1.8 and 6.5 sec) as well as main oscillation frequencies (0.2 and 0.45Hz).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "reports the experimental results for TREC 2013 and 2014 datasets respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 37,
+                            "end": 64,
+                            "text": "TREC 2013 and 2014 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experimental results demonstrate that the proposed method significantly outperforms the original log-logistic feedback model in all collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar to the results of BMVC Text dataset, we find that conventional methods  fail to deblur the given text images.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 43,
+                            "text": "BMVC Text dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Hence, it is also instrumental to investigate whether we can reshape the test datasets into multiple domains to achieve better adaptation results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Some types of datasets used in data mining are solely expressed as similarity matrices among exemplars.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to understand the reason of this discrepancy in between the recall rate of different relations, we manually partitioned the 1,345 relations in FB15k test set into four categories, then perform tests on each of them with the HiRi algorithm, the results are depicted in .",
+                    "annotation_spans": [
+                        {
+                            "start": 152,
+                            "end": 162,
+                            "text": "FB15k test",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We illustrate the applicability of the proposed approach on the challenging and recently introduced VisDial datasets, outperforming recent state-of-the-art methods by 1.1% for VisDial0.9 and by 2% for VisDial1.0 on MRR.",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 116,
+                            "text": "VisDial datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For our final task, we use the Blog corpus of  which contains 681,288 blog posts from 19,320 authors.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 42,
+                            "text": "Blog corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The variable space is then used as a bridge to enable the knowledge transfer from related source datasets to the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Efficient evaluation of these time-travel queries over archives of large collections (such as the Web) is a challenge that we address in this paper.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each image, there are 12.74 objects in KINS on average compared with 6.93 of them in KITTI Detection dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 112,
+                            "text": "KITTI Detection dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on diverse challenging datasets demonstrate the effectiveness of proposed video prediction approach, i.e., yielding more diverse proposals and accurate prediction results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Intuitively, \u03b8 can be the latent variable from source datasets, and we can use p(\u03b8|T ) to assign high weights to the source datasets related to the target dataset, and use p(g|\u03b8) to assign high weights to the source significant subgraphs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As only relevant answer passages are annotated in this collection, we create non-relevant documents by using a sliding window of random size.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the results in , we observe that these two datasets are almost equally challenging on this task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our case the Round0 data set consists of compounds from Vendor catalog and corporate collec- tions.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 31,
+                            "text": "Round0 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The precision estimates are taken from the TREC 2009/10 diversity task data for Lemur, and from the MovieLens 2 dataset for pLSA (more details in section 4.2).",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 55,
+                            "text": "TREC 2009/10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 100,
+                            "end": 119,
+                            "text": "MovieLens 2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Chiebukuro data set from that task contained only about 3.1 million resolved questions from 2004 and 2005, which is substantially smaller compared to the data used in our demonstration.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 19,
+                            "text": "Chiebukuro data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conducted experiments on Stanford Online Products dataset  for reconstructing real world images.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 60,
+                            "text": "Stanford Online Products dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "After collecting samples from 100 episodes of optimization on the GENKI dataset, LSPI is able to learn a policy that achieves a 2.66 fold greater reduction in total loss than the LMA on a test set of faces from the GENKI dataset (see ).",
+                    "annotation_spans": [
+                        {
+                            "start": 66,
+                            "end": 79,
+                            "text": "GENKI dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 215,
+                            "end": 228,
+                            "text": "GENKI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is because CovType data set is very unbalanced in which random sampling performs very poor as shown by the analysis.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 32,
+                            "text": "CovType data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our algorithm on TREC test collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 50,
+                            "text": "TREC test collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, based on commonly used base networks, our SymNets achieve the new state of the art on three benchmark domain adaptation datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated MKMs on the two multiclass data sets from previous benchmarks  that exhibited the largest performance gap between deep and shallow architectures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Aquaint TREC test collection of over one million newspaper articles was used .",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 32,
+                            "text": "Aquaint TREC test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use a snapshot of Wikipedia taken in March 2006 and include the most relevant article in the Interest Corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 111,
+                            "text": "Interest Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental comparison of the segmentation part IoU (%) with the recent methods on the ShapeNet Part dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 109,
+                            "text": "ShapeNet Part dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the log-likelihood for MCMC samples on the WBC dataset, for the Gibbs and control algorithms respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 54,
+                            "text": "WBC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on large real-world document collections, in both the offline and online settings, demonstrate that though LDA is a good model for finding word-level topics, vMF finds better document-level topic clusters more efficiently, which is often important in text mining applications.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in , only a small number of features (around 15%) that are frequently covered in 90% reviews, while most of features occur rarely in the whole datasets (i.e., Zipf's law).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally in column (c) the failure is most likely due the extreme difficulty of some examples on the CUHK-SYSU and PRW datasets, since several people look alike (illustrated) and some others have low-visibility issues.",
+                    "annotation_spans": [
+                        {
+                            "start": 100,
+                            "end": 126,
+                            "text": "CUHK-SYSU and PRW datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "According to the NMI values in , our ODC outperforms other clustering algorithms on five datasets: ORL, SRBCT, iris, yeast and image segmentation.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 102,
+                            "text": "ORL",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 111,
+                            "end": 115,
+                            "text": "iris",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 127,
+                            "end": 145,
+                            "text": "image segmentation",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 117,
+                            "end": 122,
+                            "text": "yeast",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 104,
+                            "end": 109,
+                            "text": "SRBCT",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Subdivide the original dataset into smaller datasets that can be separately processed entirely in main memory.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We believe this behavior dataset will encourage a lot of researches.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Different from traditional dataset (e.g., ICDAR 2015, ICDAR2017-MLT), the text instances in SCUT-CTW1500 are labelled by polygons with 14 vertices.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 52,
+                            "text": "ICDAR 2015",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 54,
+                            "end": 67,
+                            "text": "ICDAR2017-MLT",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 92,
+                            "end": 104,
+                            "text": "SCUT-CTW1500",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each data set consisted of 2,000 training points and 10,000 test points, distributed either uniformly or nonuniformly in the unit hypercube.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first, termed SearchAsk dataset, contains search sessions that turned into question sessions.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 35,
+                            "text": "SearchAsk dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The retrieval of news content can be thought of as a distributed IR task with only two databases -the web and the news corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 102,
+                            "end": 125,
+                            "text": "web and the news corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our results demonstrate a significant boost in zero-shot SBIR performance over the state-ofthe-art on the challenging Sketchy and TU-Berlin datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 118,
+                            "end": 148,
+                            "text": "Sketchy and TU-Berlin datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, a dataset another order of magnitude larger is necessary to approximate even a small commercial database of music.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our local copy of the Wikipedia database, which is described in Section 3.1, is used to compute the network features.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 40,
+                            "text": "Wikipedia database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We collect a query-based dataset, as mentioned in Section 4.1.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluation of the number of activity clusters (K \u2032 ) with respect to the number of subaction clusters (K) on the Breakfast dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 113,
+                            "end": 130,
+                            "text": "Breakfast dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Now that we have evaluated various Fisher layer configurations on a subset of ILSVRC, we assess the performance of our framework on the full ILSVRC-2010 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 141,
+                            "end": 160,
+                            "text": "ILSVRC-2010 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The boxplots in this figure summarize 50 random permutations of the dataset, giving one-sided significance values of p = 0.02 whenever the Dunn index computed from the original dataset exceeds all of the permutation values.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the offline process, the image database first goes through face detection to identify and locate frontal faces in the images.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This experiment is on the \"peppers\" image from the dataset as seen in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the test sets, the additive noise in the outputs was omitted.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The assessment collection process spanned over a period of four weeks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With a non-trivial adaptation of deep learning methods from other application domains to computational epidemiology, and more specifically by using adjacent graph convolution and a recurrent module, our proposed method shows significant and consistent performance improvement over other representative baseline methods on multiple real-world datasets in our evaluation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Statistics: The COIN dataset consists of 11,827 videos related to 180 different tasks, which were all collected from YouTube.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 28,
+                            "text": "COIN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": We define a full-body graph structure for the LSP dataset, and a upper-body graph structure for the FLIC dataset respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 48,
+                            "end": 59,
+                            "text": "LSP dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 102,
+                            "end": 114,
+                            "text": "FLIC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our second experiment, we evaluate the efficiency of different multi-label learning methods on a large-scale dataset, which consists of around 800K instances.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we apply our methods to a standard ranking task, where we treat answers as a semi-structured document collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We then evaluated our method for sketch-based 3D shape retrieval on two large-scale benchmark datasets, i.e., SHREC13 and SHREC14.",
+                    "annotation_spans": [
+                        {
+                            "start": 110,
+                            "end": 117,
+                            "text": "SHREC13",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 122,
+                            "end": 129,
+                            "text": "SHREC14",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We utilize this dataset in much the same way and report full results on the more challenging Caltech Pedestrian Dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 119,
+                            "text": "Caltech Pedestrian Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the full precision-recall curves on the Wikipedia dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 63,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Focusing on LambdaRank as the learning algorithm , we show that similar to evaluation, given a fixed judgment budget for building training sets, better test set performance is obtained when training sets with more queries but shallow judgments per query are used as opposed to training sets with less queries but deep (more) judgments per query.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Further evaluations on additional datasets show that the proposed method achieves verification accuracies of 97.0% and 94.4% on CFP-FP and",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We provide an efficient learning algorithm for Tiled CNNs based on Topographic ICA, and show that learning complex invariant features allows us to achieve highly competitive results for both the NORB and CIFAR-10 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 195,
+                            "end": 221,
+                            "text": "NORB and CIFAR-10 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We continue with the two data sets presented earlier -Optdigits and MNist digit data sets (with the same query-reference split mentioned in Section 2).",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 89,
+                            "text": "-Optdigits and MNist digit data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The BeiRen dataset comes from BeiGuoRenBai 2 , a large retail enterprise in China, which records its supermarket purchase history during the period from Jan. 2013 to Sept. 2013.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 18,
+                            "text": "BeiRen dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With two bias parameters, our method performs remarkably well on two large datasets: ImageNet (1000 classes) and",
+                    "annotation_spans": [
+                        {
+                            "start": 85,
+                            "end": 93,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare VBC and several benchmark algorithms, including VDN , QMIX  and Sched-Net  for controlling allied units.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Taking into account the logarithmic scale, we can see that the number of patterns found reduces significantly on the ILP dataset, but not on the WebKB dataset; WP is similar to WebKB (not shown).",
+                    "annotation_spans": [
+                        {
+                            "start": 117,
+                            "end": 128,
+                            "text": "ILP dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 145,
+                            "end": 158,
+                            "text": "WebKB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The snippet retrieval track of INEX 2011 focuses on how best to generate informative snippets for XML search results, in which the Wikipedia corpus is used.",
+                    "annotation_spans": [
+                        {
+                            "start": 131,
+                            "end": 147,
+                            "text": "Wikipedia corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on the LifeScans dataset demonstrate the performance and influence of key parameters of the predictor.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 46,
+                            "text": "LifeScans dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Nature corpus contains 340K documents and a vocabulary of 4,500 terms; the New York Times corpus contains 1.8M documents and a vocabulary vocabulary of 8,000 terms; the Wikipedia corpus contains 3.6M documents and a vocabulary of 7,700 terms.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "Nature corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 79,
+                            "end": 100,
+                            "text": "New York Times corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 173,
+                            "end": 189,
+                            "text": "Wikipedia corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As described in , the data set included 70 users' UNIX command sequence.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For our evaluation, since applying our MT system to TREC collections requires substantial time, we only considered the ROBUST collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 68,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 136,
+                            "text": "ROBUST collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also perform experiments on real scene light fields, including light field datasets (Lytro) released by DPW and light field datasets 1",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 93,
+                            "text": "Lytro",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "keep a collection of N surfaces with codimension one under the factorization assumption that In contrast, we consider only one surface of codimension N",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The single, slight exception to this is represented by the Haberman data set in combination with querying 20% of the data.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 76,
+                            "text": "Haberman data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed modification was evaluated using three TREC newswire and web collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our complete image retrieval system improves upon the previous state-of-the-art by significant margins on the Revisited Oxford and Paris datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 120,
+                            "end": 145,
+                            "text": "Oxford and Paris datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Left: Accuracy of DC vs. IDC on most of the data sets described in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Most partition-based cluster analysis methods (e.g., kmeans) will partition any dataset D into k subsets, regardless of the inherent appropriateness of such a partitioning.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the SEPIC corpus 2 , which includes sentence-level segmentation of 10,921 articles from ACL ARC 1.0, up to February 2007.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 25,
+                            "text": "SEPIC corpus 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar to CIFAR 10 dataset, in our experiments, we select 1000 images for each class and obtain 10000 images in all for our evaluation.",
+                    "annotation_spans": [
+                        {
+                            "start": 11,
+                            "end": 27,
+                            "text": "CIFAR 10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "a collection of a few calibration images observing the user in various poses and viewpoints.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Author \u2192 Paper \u2192 Databases Both graphs are subgraph isomorphic to two transactions in the data and are hence frequent.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In total, our final dataset comprised of 1681 query-question pairs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The learned setting indicates that it was trained on our dataset for fair comparison.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used several major datasets that contain people as a source of images: 1, 548 images from SUN , 33, 790 images from MS COCO , 9, 135 images from Actions 40 , 7, 791 images from PASCAL , 508 images from the Ima-geNet detection challenge  and 198, 097 images from the Places dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 269,
+                            "end": 283,
+                            "text": "Places dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 93,
+                            "end": 96,
+                            "text": "SUN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 119,
+                            "end": 126,
+                            "text": "MS COCO",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 148,
+                            "end": 158,
+                            "text": "Actions 40",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 180,
+                            "end": 186,
+                            "text": "PASCAL",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 209,
+                            "end": 238,
+                            "text": "Ima-geNet detection challenge",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Krimp post-processes a candidate collection, filtering it in a static order.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Due to the large size of the data, a hyperparameter sweep is computationally prohibitively expensive and we set the LDA hyperparameters \u03b1 = 0.05 and \u03b2 = 0.001 which we found to work well on the ClueWeb12 data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 194,
+                            "end": 212,
+                            "text": "ClueWeb12 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By using subjectspecific random effects, we see that linear mixed effects regression performs better than linear regression for gaze estimation on the three datasets in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Continuing work involves development of confidence intervals based on , development of numerical methods suitable for very large data sets based on Girard's(1991)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Building the Cognos experts database.",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 36,
+                            "text": "Cognos experts database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For these comparisons four smaller TREC datasets were used: the Associated Press portions of disks two and three (AP2,AP3), San Jose Mercury News from disk three (SJM) and Wall Street Journal from disk two (WSJ2).",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 145,
+                            "text": "San Jose Mercury News",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 163,
+                            "end": 166,
+                            "text": "SJM",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 64,
+                            "end": 122,
+                            "text": "Associated Press portions of disks two and three (AP2,AP3)",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 172,
+                            "end": 212,
+                            "text": "Wall Street Journal from disk two (WSJ2)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this subsection, we investigate the impact of the reference dataset by varying the reference dataset of choice.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first further evaluate the simulation quality by comparing the causal discovery results of baseline methods between a real-world dataset and a simulated dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is assumed that the dataset is corrupted by independent Gaussian noise with variance \u03c3 2 and that the latent function is drawn from a Gaussian process with zero mean and covariance determined by the kernel k.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By introducing the pseudo-word, the collection becomes additionally ambiguous and by varying the number and size of pseudo-words, Sanderson found relationships between ambiguity, disambiguation and retrieval effectiveness.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Compared to the softmax loss, although both discriminative loss and softmax loss have the same complexity, Discriminative improves over Softmax by a large margin for all measures on both datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have tested our DNG-MCTS algorithm and compared the results with UCT in three common MDP benchmark domains, namely Canadian traveler problem, racetrack and sailing.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Columns Aab, Aac, indicate the test set AUC of absolute and comparison labels, respectively, when S = Ctrn; we report these only for datasets for which we have comparisons.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the academic domain, we use the ACL anthology reference corpus , and randomly select 150 more recent query documents (papers).",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 66,
+                            "text": "ACL anthology reference corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Office31 dataset, our solution is ranked among the top two performers in four instances out of six and the top performer in two instances.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 19,
+                            "text": "Office31 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The kernel space is assumed to be spanned by all the datapoints in MUSK1 dataset and a subset of the datapoints in MUSK2 dataset (one tenth of the original training set is randomly selected for this purpose).",
+                    "annotation_spans": [
+                        {
+                            "start": 115,
+                            "end": 128,
+                            "text": "MUSK2 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 67,
+                            "end": 80,
+                            "text": "MUSK1 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We focus our analysis by examining a subset of the YELP dataset for the following three different clustering tasks:",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 63,
+                            "text": "YELP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The 4th feature, i.e., the prior probabilities, are estimated from the tweet collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 87,
+                            "text": "tweet collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on unsupervised monocular depth estimation using the KITTI dataset , building upon the model from  (\"Baseline\").",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 74,
+                            "text": "KITTI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In case of the fMRI dataset with p = 228483, we used 20 blocks, and the total number of boundary nodes were only |B| = 8697.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 27,
+                            "text": "fMRI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Applying non-parametric methods for higher dimensional datasets requires exploiting correlations between dimensions .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the TAC 2015 EDL collection, all other mentions were considered non-relevant.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 31,
+                            "text": "TAC 2015 EDL collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There are three levels of our dataset: domain, task and step.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The performance is further evaluated via real world datasets in \u00a7 4.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It has been proven to outperforms other state-of-the-art algorithms on most dataset, and furthermore, due to its space efficiency, it completes successfully the mining tasks on large input datasets and with low support thresholds that cause all the other algorithms to fail.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, when performing searches in versioned document collections, it is often desirable to limit the number of results from the same document that are returned, say, returning only the highest-scoring version of each document.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We showcase the effectiveness of our Grid R-CNN framework on the object detection track of the challenging COCO benchmark .",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 121,
+                            "text": "COCO benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed method solves the cvxMF problem of Ding, Li and Jordan  in an online/streaming fashion, and allows for selecting a collection of \"typical\" representative sets of individual clusters (see ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "MNB, which learns all data, performed worst because the TV data set contained sudden and significant shifts as shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 67,
+                            "text": "TV data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This confirms that combining different query translation approaches is beneficial, and is also robust with respect to the test collection, language, and underlying MT model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As shown in  we replace the source dataset S with the translated versionS, and apply a weight \u221a 1 \u2212 z i to the adversarial loss.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For illustrating our results in this paper we use the adult data set available from the UCI repository",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 102,
+                            "text": "UCI repository",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report results regarding the MSN dataset, averaged over 5 folds, on which we trained \u03bb-MART rankers by optimizing NDCG@50.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 43,
+                            "text": "MSN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "There is distinct improvement of almost 100 times in the computation speed when the size of a class in the aircraft dataset is only 100.",
+                    "annotation_spans": [
+                        {
+                            "start": 107,
+                            "end": 123,
+                            "text": "aircraft dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To compute the generalization error for a given number of principal components retained, we compute the per-vertex Euclidian distance between every sample of the test set and its corresponding model projection and then take the average value over all vertices and test samples.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We present polynomial time heuristic algorithms that make PGEM learning computationally more tractable and therefore amenable to large event datasets, possibly with a large number of event types.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our synthetic dataset, the absolute feature vectors x i \u2208 R d , i \u2208 N , are sampled from a Gaussian distribution N(0, \u03c3 x I d ) with feature dimension d ranging from 20 to 400 and dataset size N ranging from 500 to 15000.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this purpose we use data from the movie and music domains provided in the MovieLens 1M, 1 Netflix, 2 and Last.fm 1K 3 datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 130,
+                            "text": "MovieLens 1M, 1 Netflix, 2 and Last.fm 1K 3 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All images in CUB dataset are preprocessed and cropped to ensure that bounding boxes of birds have greater-than-0.75 objectimage size ratios.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 25,
+                            "text": "CUB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Following  we chose to evaluate the performance of IDC with respect to a labeled data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Because of space constraints, we only report results with the News and Tags datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 84,
+                            "text": "News and Tags datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since we do not have users' explicit ratings in our dataset, we use u's access frequency on i to approximate the rating ri,j in R.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We show that adding L AD to the distillation loss which is an existing information preserving loss consistently outperforms the state-of-the-art performance in the iILSVRC-small and iCIFAR-100 datasets in terms of the overall accuracy of base and incrementally learned classes.",
+                    "annotation_spans": [
+                        {
+                            "start": 164,
+                            "end": 201,
+                            "text": "iILSVRC-small and iCIFAR-100 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A major challenge in real problems is dealing with missing entries in data, due to sensor failure, ad-hoc data collection, or partial knowledge of relationships in a dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To validate the proposed approach we conducted extensive experiments on three large publicly available datasets: song dataset from the Kaggle's MSD challenge and two movie datasets MovieLens and Netflix.",
+                    "annotation_spans": [
+                        {
+                            "start": 181,
+                            "end": 190,
+                            "text": "MovieLens",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 195,
+                            "end": 202,
+                            "text": "Netflix",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On Sina Weibo Conversation Corpus, VHRED, DAWnet and particularly HRED perform badly in terms of BLEUs.",
+                    "annotation_spans": [
+                        {
+                            "start": 3,
+                            "end": 33,
+                            "text": "Sina Weibo Conversation Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We chose this dataset to validate that the knowledge of object commonality learned from PASCAL VOC12 dataset can adapt to the fine-grained species which contain many unusual bird objects.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 108,
+                            "text": "PASCAL VOC12 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments are carried out on the IAM online database  which contains forms of unconstrained English text acquired from a whiteboard.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 54,
+                            "text": "IAM online database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the remainder, we do not consider these datasets further.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the first stage, the relevance between the given user information need and each of the documents in a collection is calculated.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "match the input image to a large dataset of panoramas , and transfer the panorama lighting (obtained through a light classifier) to the image.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The use of fully connected neural networks allowed us to make predictions about the performance based on whether the mutual information is larger or smaller than the log dataset size.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the clustering results on the Mix data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 36,
+                            "end": 48,
+                            "text": "Mix data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Column labels likelihood considers the table corpus and is estimated using a Dirichlet-smoothed language model:",
+                    "annotation_spans": [
+                        {
+                            "start": 39,
+                            "end": 51,
+                            "text": "table corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While we focus on learning visual concepts from external visual data, VQA dataset is still a valuable source of learning diverse knowledges.",
+                    "annotation_spans": [
+                        {
+                            "start": 70,
+                            "end": 81,
+                            "text": "VQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These insufficiencies of the data limit the performance of any learning machine or other statistical tool constructed from and applied to the data collection -no",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on the SCOP datasets (  ).",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 28,
+                            "text": "SCOP datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The affinity weight is effective on the Log CoS dataset, but performs poorly on Log IwS. However, topic bias ('T') shows the weakest performance on both datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 55,
+                            "text": "Log CoS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The former contains 12 categories covered by the PASCAL VOC 2012 dataset, while the latter contains the rest.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 72,
+                            "text": "PASCAL VOC 2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The coarse structure of such \"folded\" data sets as  hides their nonlinear structure from greedy optimizers, virtually ensuring that top-down mapping algorithms will become trapped in highly suboptimal solutions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "= 0) is almost always outperformed by McWL on these datasets, confirming the advantage of optimizing the two objectives simultaneously.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While these correlations and distributions are important if our goal is to achieve the best performance on a single domain, the model must learn to be more robust to them if we instead care about \"out of box\" performance on unseen domains, e.g., older TREC collections .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It is necessary to evaluate the model performance on small-scale dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate on the split MNIST benchmark, where the data are split into five tasks, each classifying between two classes, and the model is trained on each task sequentially.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 40,
+                            "text": "MNIST benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While the remaining methods such as Well cannot scale to datasets in this size.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is not surprising since the number of related feature-users pairs is much smaller than the number of unrelated feature-user pairs on these two data sets, and thus the proposed new algorithm is expected to work better.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the same dataset, we performed 5-fold cross validation by randomly sampling 776 images into training and 243 images into test in each fold.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As such, they adopt a different regularization term from ours, which exploits labels in the datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the different parameter combinations used for our datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "According to the experiments, the proposed model yields much better results on TREC Blog06 and Blog07 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 109,
+                            "text": "TREC Blog06 and Blog07 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From Tables 4, 5 and 8, we can see that using discriminative loss with VGG network significantly boosts the performance on both datasets, e.g., at R@1, it improves over GoogleNet 6.3% and 9.8% for CUB-200-2011 and CAR196, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 197,
+                            "end": 209,
+                            "text": "CUB-200-2011",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 214,
+                            "end": 220,
+                            "text": "CAR196",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These are the same datasets on which SPGP-DR was originally tested , and it is worth mentioning that SPGP-DR's only entry in the competition (for the Temp dataset) was the winning one.",
+                    "annotation_spans": [
+                        {
+                            "start": 150,
+                            "end": 162,
+                            "text": "Temp dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the Matlab version to evaluate on synthetic datasets and to compare against the state-of-the-art algorithms which are also implemented in Matlab.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We collect a multi-source POI dataset from four widely-used tourism platforms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Side-by-side comparison with state-of-the-art (first eight samples from the test set).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Explosive growth in the size of modern datasets has fueled the recent interest in distributed statistical learning.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The sequence signatures for the above protein sequences originate from the InterPro database (ftp://genome-ftp.stanford.edu/pub/yeast/sequence_ similarity/domains/domains.tab) and are determined by an application of the InterProScan tool .",
+                    "annotation_spans": [
+                        {
+                            "start": 75,
+                            "end": 92,
+                            "text": "InterPro database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Sessions in the previous 3 days (days 22-24) were used for training and the rest of the dataset (days 1-21) was used to calculate the aggregated statistics  .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "With this purpose, we randomly select 150 recipes with their associated images from the test set and, for each image, we collect the corresponding real recipe, the top-1 retrieved recipe and our generated recipe.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We collected a dataset of trajectories from an expert controller and vary the number of trajectories our models are trained on.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recently, social features are appearing as explicit userfacing features such as in: (1) annotations, where interest by the searcher's social network is visibly marked on an existing search result (v.s. ); (2) injected results, where social data, such as tweets or status posts, are presented in a manner similar to and within the existing search results, but sourced outside of the web corpus; and (3) independent results, where the social data is presented in a manner not to be mistaken for one of the web search results, such as in a web answer or direct display.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The goal in indexing versioned document collections is to build full-text index structures that allow keyword search across all versions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This experiment resulted in inferior results on the Caltech-101 and VOC2007 datasets, with respect to training without L2 normalization and applying the intra-normalization technique.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 84,
+                            "text": "Caltech-101 and VOC2007 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Human3.6M and our captured dataset; and (3) Hu-man3.6M, our captured dataset, and COCO.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 9,
+                            "text": "Human3.6M",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 44,
+                            "end": 54,
+                            "text": "Hu-man3.6M",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 82,
+                            "end": 86,
+                            "text": "COCO",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To make predictions, aggregation models first use each submodel to generate a prediction based on its training dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each sentence in the Interest Corpus essentially raises a different aspect of the topic for consideration as a sentence of interest to human readers.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 36,
+                            "text": "Interest Corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "which have shown promising comparable results to CNNs in several standard datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A functional causal model (FCM) over a set of variables {Y Causal Misidentification in Policy Learning Benchmarks and Realistic Settings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The behaviour was due to the fact that the book database often contains very similar entries (e.g., different editions of a book), thus forcing users to check if the current result item was substantially different from the books already placed in the basket.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For our experiment we used unigram counts from the Google ngram dataset 9 to calculate a collection model.",
+                    "annotation_spans": [
+                        {
+                            "start": 51,
+                            "end": 73,
+                            "text": "Google ngram dataset 9",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our method on Hands2017 Challenge dataset and NYU dataset, where state-of-the-art performance is shown.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 53,
+                            "text": "Hands2017 Challenge dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 58,
+                            "end": 69,
+                            "text": "NYU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated the performance with label accuracy by using ten-fold cross validation: eight for training, one for development and remaining one for test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments were conducted on three datasets: two standard TREC datasets, and one Wikipedia dataset for federated search based on the ClueWeb.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 76,
+                            "text": "TREC datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 86,
+                            "end": 103,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 138,
+                            "end": 145,
+                            "text": "ClueWeb",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We argue that our experimental design is fair because: 1) we do not use any privileged information from the test set in which results of all methods are reported; 2) all parameters are discovered in the same validation set; 3) our collections are large enough for effective learning, even when we conduct cross-validation in V for parameterization (e.g., results in these preliminaries experiments are basically identical to the ones reported in the test with the discovered parameters); and 4) the very tight confidence intervals reported in our results (Section 5.4) are evidence of low variation and thus learning convergence.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Two main datasets are involved in our evaluation, that are KITTI  and CityScapes .",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 64,
+                            "text": "KITTI",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 70,
+                            "end": 80,
+                            "text": "CityScapes",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The WorldExpo10 dataset was introduced by Zhang et al. , containing 3980 frames from 108 different scenes from the Shanghai 2010 WorldExpo.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "WorldExpo10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They are a condensed, i.e. both concise and lossless, representation of a collection of frequent itemsets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on two ClueWeb09 corpora and four query sets show that in spite of random components, selective search is stable for most queries.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 36,
+                            "text": "ClueWeb09 corpora",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Importantly, the matching function is typically learned using a dataset of image-caption pairs.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Kaggle has released the MetaKaggle dataset 2 , which contains detailed information about competitions, submissions, etc.",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 42,
+                            "text": "MetaKaggle dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the present work, we provide alternative formulations that eliminate the need for a free parameter and we demonstrate signicant empirical speed improvements, particularly on massive datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "RM is : Comparison of diferent ranking algorithms in terms of NDCG@5 on training, test and validation sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed AIRD framework is designed to make additional information available to the detector in the form of retrieval from the reference dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Visual recognition research has achieved major successes in recent years using large datasets and discriminative learning algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Firstly, existing algorithms typically involve iterative optimization which does not scale well to big data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Test Collections and Evaluation Metrics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this work, we utilize Word2Vec and fastText as they do not require dataset specific human annotation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Unsupervised learning seeks to induce good latent representations of a data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "According to the Buckley and Voorhees's work , the moderate completeness of our test collection possibly affects the system ranking based on the evaluation metric: the Kendall correlation between the system ranking based on MAP with incomplete relevance judgments and that with complete relevance judgments was 0.9 on average in three test collections from the Text REtrieval Conference (TREC).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If \u03c1 d is a document quantum language model obtained by ML, its smoothed version is obtained by interpolation with the ML collection quantum language model \u03c1c:",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate PSDDN on TRANCOS dataset to test its generalizability, though it is proposed for person detection and counting.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 36,
+                            "text": "TRANCOS dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "At the same time, as datasets continue to increase in size and complexity, the possibility of inferring sample-specific phenomena by exploiting patterns in these large datasets has driven interest in important scientific problems such as precision medicine .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested the privacy preserving approach over a wide range of data sets and metrics.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The second dataset is the Microsoft Web N-gram service , available exclusively in the web service format.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since massive works have reported performance on the Market-1501 dataset, it is simply intractable to compare all of them.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 72,
+                            "text": "Market-1501 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For this task we used the same neural-net transformation as in  and the results for the Abalone dataset are shown in .",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 103,
+                            "text": "Abalone dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The range of values of \u03b1 is determined by the number of data points in the particular data set at hand.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the results on the Enron dataset, which showed that our method improved search efficiency.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 38,
+                            "text": "Enron dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Different from existing works  which require expensive labor to manually annotates attribute labels on person re-id datasets, we transfer attribute information from person attribute datasets to re-id datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Several recent image models have been evaluated on small image patches sampled from the Berkeley segmentation dataset (BSDS300) .",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 127,
+                            "text": "Berkeley segmentation dataset (BSDS300)",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our factorization on a set of public, real-world tensor datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 3 covers the test collections to be used for the workshop.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to show the ability of our method to provide subjectindependent gaze estimation for real data, we perform gaze direction prediction for our Real-Video dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 149,
+                            "end": 167,
+                            "text": "Real-Video dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Reweighting the loss corresponding to positive and negative examples resulted in similar performance to the ranking objective on the INRIA pedestrian data set, but requires a search across an additional parameter.",
+                    "annotation_spans": [
+                        {
+                            "start": 133,
+                            "end": 158,
+                            "text": "INRIA pedestrian data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Run-time can can be linear in the number of nonzero elements in the data, but these methods require multiple passes through the entire dataset to converge, and are not suitable in online settings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test the system on the WSCD 2014 dataset, in which the actual content of the queries and documents are not available due to privacy concerns.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 43,
+                            "text": "WSCD 2014 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, properties \"big\" and \"fast\" of 50 kind animals in AwA dataset are shown in , which describe the size and speed with continuous value.",
+                    "annotation_spans": [
+                        {
+                            "start": 63,
+                            "end": 74,
+                            "text": "AwA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We assume that we have access to one or more annotated datasets with a total of M data instances.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, the running times database ( ) has numerical features; the genre database ( ) has nominal features, and the actor graph database ( ) provides graph relational features.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 39,
+                            "text": "running times database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 72,
+                            "end": 86,
+                            "text": "genre database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 121,
+                            "end": 141,
+                            "text": "actor graph database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to better capture the similarity between Web snippets, a first step consists in building a co-occurence graph based on Dice coefficient calculated over the Google Web1T corpus  from which senses are discovered by word sense induction algorithms.",
+                    "annotation_spans": [
+                        {
+                            "start": 165,
+                            "end": 184,
+                            "text": "Google Web1T corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To test our method on a benchmark where highly optimized first-order methods are available as references, we train ResNet-50 on ImageNet.",
+                    "annotation_spans": [
+                        {
+                            "start": 128,
+                            "end": 136,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the Tesla dataset, each time series ((X 1 ) ij , . . . , (X N ) ij ) were normalized prior to learning by subtracting by the mean and dividing by the standard deviation.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 21,
+                            "text": "Tesla dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the accuracies on the newsgroup and Industry data sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 60,
+                            "text": "newsgroup and Industry data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, for the Gov2 collection, the H-WSD method improves the performance (in terms of MAP) for 60% of the queries compared to the WSD baseline, while hurting only 30% of the queries.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 37,
+                            "text": "Gov2 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We analyze the proposed methods on CNN architectures over the CIFAR-10 dataset , which contains 10 classes with 50k training examples and 10k test examples.",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 78,
+                            "text": "CIFAR-10 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We compare online algorithms on N = 1.88 million 8 \u00d7 8 patches, a dense subsampling of all patches from 200 images of the Berkeley Segmentation dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 122,
+                            "end": 151,
+                            "text": "Berkeley Segmentation dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Employing LS-DCUR within challenging applications such as segmentation of fMRI data or topic models for text collections with billions of data points are other promising scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on real-world datasets demonstrate the effectiveness of our approach.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset contains 60 different human action classes that are divided into three major groups: daily actions, mutual actions, and health-related actions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the NIPS data set, we provide some example topics (with top 15 terms) discovered by HDP-LDA and sparseTM in .",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 21,
+                            "text": "NIPS data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments also show that the CISO training sets typically lead to classifiers that significantly outperform classifiers built with traditionally labeled training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 49,
+                            "text": "CISO training sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "provides the details of the collections and topics used.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset contains 1.2 million images for training and 50000 images for validation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Example videos of the \"Wedding Dance\", \"Birthday Party\" and \"Graduation Ceremony\" classes taken from the USAA dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 117,
+                            "text": "USAA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The third dataset is the Nova dataset that was used for the active learning workshop and challenge co-located with AISTATS 2010.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 37,
+                            "text": "Nova dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train the latter on the supervised training data recently made available by the official system evaluation campaign on Twitter Sentiment Analysis organized by Semeval-2015.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, the proposed Rob-GAN outperforms SN-GAN (in terms of inception score) on these two datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Using the same (standard) subset of the WebKB dataset as used in , we obtained classification error rates illustrated in  (left).",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 53,
+                            "text": "WebKB dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Most of these functions can be easily extracted from a sequential version that processes memory resident datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While for the very different OHSUMED collection, a smaller value is better.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 47,
+                            "text": "OHSUMED collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All the methods are trained on the cleaned WebFace  training data and tested on LFW for three times to obtain the average accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 65,
+                            "text": "WebFace  training data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The performance will be given either with recall at equal error rate (EER), positive detection rate at low FPPI, or as classif.by-detection (on TUD shape2), depending on the type of results reported on that dataset thus-far.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We will consider datasets with N training points which form a subset of M points on a full Cartesian product grid.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluated the performance of our proposed models on the UniMobile dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 59,
+                            "end": 76,
+                            "text": "UniMobile dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct experiments on the CUHK03 dataset , which is one of the largest databases for person re-identification.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 44,
+                            "text": "CUHK03 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As with , our dataset comprised 132,833 training phones and 6,831 testing phones.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To answer RQ2, we observe that both the RMSE and correlation results show that BARACO outperforms MT: BARACO achieves better estimates in both absolute and relative terms, except on the TD2004 dataset with the UBM click model for generation, whose special nature has been recognized before.",
+                    "annotation_spans": [
+                        {
+                            "start": 186,
+                            "end": 200,
+                            "text": "TD2004 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used a dataset of approximately 18,000 news events collected from Twitter dating from August 2013 until May 2014.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the purpose of the ablation study, we use the validation dataset of the ILSVRC12 dataset as the reference dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 92,
+                            "text": "ILSVRC12 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In particular, the result on the Swiss-Prot dataset for the (7, 3)-mismatch kernel is very promising and compares well with the best results of the state-of-the-art, but computationally more demanding, profile kernels .",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 51,
+                            "text": "Swiss-Prot dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the complete synthetic control data set (i.e., containing sequences of all 6 classes), on average, a sequence matches one of the features at the root node with a prefix of length 14.54, which is substantially shorter than the length of the whole sequence (60).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Similar to page ranking in general web search, e.g., Google, collection selection plays a vital role in distributed search.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The third type of datasets is generated from ESIM , an open-source event camera simulator.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Because the data collected at our lab showed limited variability in lighting, background composition, and clothing, we used the MIT CBCL pedestrian data set which contains images of 924 unique, roughly aligned pedestrians in a wide variety of environments to estimate the AST.",
+                    "annotation_spans": [
+                        {
+                            "start": 128,
+                            "end": 156,
+                            "text": "MIT CBCL pedestrian data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The KD1,KD2 data sets were obtained from the KDDCup data set by discretizing the continuous attributes into 10 and 100 bins respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "KD1,KD2 data sets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 44,
+                            "end": 60,
+                            "text": " KDDCup data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "|D| = 0.0001, which are typical values for a dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on 4 different datasets are conducted, under different teacher-student architectures.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results on the more challenging CIFAR-100 dataset show that adversarially trained Prior Networks yield a more modest increase in robustness over baseline approaches, but it still takes significantly more computational effort to attack the model.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 49,
+                            "text": "CIFAR-100 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In our experiment based on the aforementioned test collection, our proposed method showed significant improvements on MAP and nDCG over baseline methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Here, we do not present the accuracy results for the largest Berk-Stan data set as the cost of performing the exhaustive enumeration needed to obtain the accuracy ground-truth is prohibitive on this data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 79,
+                            "text": "Berk-Stan data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested the method on several projective datasets from  and VGG  and compared the results to state-of-theart projective reconstruction pipelines, including: P 2 SfM .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Continuing to abuse the notation from section 2, we can say that the LFW database only samples a small subset of G, which is now the set of all transformations that occur in LFW.",
+                    "annotation_spans": [
+                        {
+                            "start": 69,
+                            "end": 81,
+                            "text": "LFW database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 174,
+                            "end": 177,
+                            "text": "LFW",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As in the case of the precision plots, there are two recall plots for the heterogeneous case of DBLP data set, each of which contributes to a different type of links.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 109,
+                            "text": "DBLP data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(1) Only SR # , MSR survive on all datasets, whereas PSUM, OIP, SR",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This explains the gap of our accuracy gain for two datasets; ADL has remarkable performance to induce the classifier to learn the less discriminative parts, as supported by CUB-200-2011 evaluations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Two benchmark data sets, the Sarcos  and the USPS data sets , were used for regression and classification tests respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 59,
+                            "text": "Sarcos  and the USPS data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By summing up the local supports of itemsets we can determine their global support in the original dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We therefore conclude that a combination of our risk adjustment approach with Jelinek-Mercer smoothing can largely outperform both the Jelinek-Mercer and Dirichlet smoothing methods on all collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train our model on CelebA dataset , consiting of over 100K celebrity faces with wide-range of attributes.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 36,
+                            "text": "CelebA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "That paper showed how to construct a PCFG that generates the same distribution over a collection of documents as an LDA model, and where Bayesian inference for the PCFG's rule probabilities yields the corresponding distributions as Bayesian inference of the corresponding LDA models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "BH-Free is characterized by a high recommendation accuracy: on the Movielens data set, it achieves competitive results with respect to LDA, and it outperforms all competitors on the sample of the Netflix collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 196,
+                            "end": 214,
+                            "text": "Netflix collection",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 67,
+                            "end": 85,
+                            "text": "Movielens data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, we focus here on the case of versioned document collections, i.e., collections where each document is represented by multiple versions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The CSG values for the noisy MNIST datasets are shown on the right of  along side with the test error rate obtained with an AlexNet CNN .",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 43,
+                            "text": "MNIST datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Instead, standard image/video collection is a product of trying to ensure coverage of the target category labels on one hand, and managing resource availability on the other.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Effectiveness: We evaluate our method on a diverse collection of real data sets with thousands of nodes and attributes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The average MAP and P@10 of using faceted feedback on OHSUMED dataset are improved by 32.4% and 43.9% over the baseline (BM25) respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 54,
+                            "end": 69,
+                            "text": "OHSUMED dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "When applied to our results in the mixed setting on VOC 6x2 dataset, this visualization procedure yields clusters that roughly match object categories.",
+                    "annotation_spans": [
+                        {
+                            "start": 52,
+                            "end": 67,
+                            "text": "VOC 6x2 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "It relies on the observation that in a high-dimensional ambient space, while there are many ways that each data point y j can be reconstructed using the entire dataset, a sparse representation selects a few data points from the underlying subspace of y j , since each point in S can be represented using d data points, in general directions, from S .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Merge in external memory the local results obtained from each dataset partition by removing redundancies.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We randomly sampled 966 linked pages from the GOV web document corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 46,
+                            "end": 69,
+                            "text": "GOV web document corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Additionally we perform a user study to compare our results with state-of-the-art based on 80 image pairs from the test set (the indices of the pairs, as well as the results of  were kindly provided by the authors of ).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The final value of the projection depends on the size of the dataset, as will be discussed below.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the \"MovieLens\" dataset, the MLIRM and RDIRM-prod obtained nearly identical values for the number of K \u00d7 L because this dataset was sparse, and a single background layer was sufficient to explain irrelevant entries.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 27,
+                            "text": "\"MovieLens\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results indicate that for both Dataset 1 and Dataset 2, the improvements of the KNN methods over both Single and QC are statistically significant (p-value<0.05).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed method obtains state-of-the-art performance on both the difficult Aachen Day-Night localization dataset and the InLoc indoor localization benchmark, as well as competitive performance on other benchmarks for image matching and 3D reconstruction.",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 116,
+                            "text": "Aachen Day-Night localization dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 125,
+                            "end": 160,
+                            "text": "InLoc indoor localization benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The polynomial ensemble F(s, n, A) is a collection of polynomials f : {\u22121, 1} n \u2192 R satisfying the following conditions",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In large-scale datasets, it is even more typical to encounter the incomplete labeling issues due to the huge cost of labeling.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the purpose of the experiment however, the users were instructed to cut-and-paste sequences of moves from real games because the board positions encountered in real games, when used as queries, have somewhat higher likelihood of retrieving relevant positions from the game database than when using queries from fictitious board positions.",
+                    "annotation_spans": [
+                        {
+                            "start": 272,
+                            "end": 285,
+                            "text": "game database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To optimize this highly nonlinear energy, PatchMatch-based methods  have become popular and achieved many successes in MVS benchmarks.",
+                    "annotation_spans": [
+                        {
+                            "start": 119,
+                            "end": 133,
+                            "text": "MVS benchmarks",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This involves computing the likelihood of each optimized fit on a test set and averaging over runs and over divisions of the data into training and test sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This suggests the importance of modeling the hierarchical structure of social media sessions and varying the level of attention to words and comments based on the context, especially when the dataset is small.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These numbers are competitive when compared to similar techniques applied to these collections.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The main contributions in this paper are: 1) a photographic steganography algorithm based on deep learning architectures; 2) development of a new paradigm for cameradisplay imaging systems, CDTF-network; 3) Camera-Display 1M: a dataset of 1,000,000 camera-captured images from 25 camera-display pairs.",
+                    "annotation_spans": [
+                        {
+                            "start": 207,
+                            "end": 224,
+                            "text": "Camera-Display 1M",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The transform f \u03b1 learned in the previous section was linear, and we now apply a more sophisticated convolutional neural network to the MNIST dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 136,
+                            "end": 149,
+                            "text": "MNIST dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on Uni-Type Data Sets.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 33,
+                            "text": "Uni-Type Data Sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on three TREC datasets show that 2) The readability and coherence of RLSI topics is equal or better than those learned by LDA, PLSI and LSI.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 34,
+                            "text": "TREC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consequently, there are 10,582 images in the training set and 1,449 images in the validation set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We will show that (1) the similarity scores are robust to including only the 118 images in the Allen brain observatory data set, as well as the specific images within this set, and (2) the similarity scores decrease with neuron subsampling, whereas the pseudo-depth stays constant given enough neurons.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 127,
+                            "text": "llen brain observatory data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our signatures in relation to those in the Inter-Pro database and highlight the differences between them.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 73,
+                            "text": "Inter-Pro database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The improvement of LSCMR on the NUS dataset is not as significant as that on the Wiki dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 43,
+                            "text": "NUS dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 81,
+                            "end": 93,
+                            "text": "Wiki dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, we augment our dataset with images captured in the wild (see  for some examples).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Right: classification error rates on the test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Recently Joachims demonstrated the ability to train a linear SVM over all of the Reuters RCV1 dataset with over 800,000 documents and 47,000 word features in just minutes on a PC .",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 101,
+                            "text": "Reuters RCV1 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Meanwhile, a group of optimal parameters shows the robustness of the CRTER model on all collections used.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The trained model shows impressive detection results on both VehicleID and VeRi-776 dataset, implying a good generalization ability.",
+                    "annotation_spans": [
+                        {
+                            "start": 61,
+                            "end": 91,
+                            "text": "VehicleID and VeRi-776 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "3 summarizes the results of inference on the golf data set in all test trials: Iterating forwardbackward smoothing by means of EP improved the inferred posterior distributions over the latent states.",
+                    "annotation_spans": [
+                        {
+                            "start": 45,
+                            "end": 58,
+                            "text": "golf data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The embeddings are learned for each item or company of a purchased item depending on the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While their methods proved to be efficient on the W3C corpus, they require an amount of data that may not be available in the typical knowledge-intensive organization.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 60,
+                            "text": "W3C corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To benchmark face anti-spoofing methods specifically for unknown attacks, we collect the Spoof in the Wild database with Multiple Attack Types (SiW-M).",
+                    "annotation_spans": [
+                        {
+                            "start": 89,
+                            "end": 115,
+                            "text": "Spoof in the Wild database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Transpose convolution is defined through the convolution by the flipped kernelW conv and P \u22121 denotes inverse pooling -see Appendix D for a : Training results on MNIST with EP benchmarked against BPTT, in the energy-based and prototypical settings.",
+                    "annotation_spans": [
+                        {
+                            "start": 162,
+                            "end": 167,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our final experiment used triplets over 200 randomly chosen images of scenes from the Outdoor Scene Recognition (OSR) data set .",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 126,
+                            "text": "Outdoor Scene Recognition (OSR) data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These functions were shown to be highly effective document quality measures, especially for noisy Web collections .",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 113,
+                            "text": "Web collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe that the proposed SNR-based metric boosts the performance of state-of-the-art metric learning approaches on all the benchmark datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We simulate an online recommendation system based on movie ratings from the Movielens10m and Netflix datasets, each of which provides a sparsely filled user-by-movie rating matrix with ratings out of 5 stars.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 109,
+                            "text": "Movielens10m and Netflix datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In , we have illustrated the variation in influence  flow value with increasing number of seed starting points for the DBLP and Citation data sets respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 119,
+                            "end": 146,
+                            "text": "DBLP and Citation data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A second group of experiments is aimed at extensions of the baseline methods that exploit characteristic features of the UvT Expert Collection; specifically, we propose and evaluate refined expert finding and profiling methods that incorporate topicality and organizational structure.",
+                    "annotation_spans": [
+                        {
+                            "start": 121,
+                            "end": 142,
+                            "text": "UvT Expert Collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the other hand, to assess the similarity between ground truth and generated images for synthetic datasets created using ESIM , each ground truth is matched with the corresponding reconstructed image with the closest timestamp, as mentioned in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The idea of an SVM mixture is not new, although previous attempts such as s paper on Support Vector Mixtures  did not train the SVMs on part of the dataset but on the whole dataset and hence could not overcome the 'Part of this work has been done while Ronan Collobert was at IDIAP, CP 592, rue du",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our data set is from the data-centric track of INEX 2010 , which consists of: 1) the IMDB data set including 1,594,513 movies; 2) 26 query topics (keywords, description, and narrative) created by the track participants (in the final version); and 3) relevance judgments of query-document pairs.",
+                    "annotation_spans": [
+                        {
+                            "start": 47,
+                            "end": 56,
+                            "text": "INEX 2010",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 85,
+                            "end": 98,
+                            "text": "IMDB data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ClueWeb09 Category B consists of crawl seeds (2.5M), English Wikipedia (6.0M) and crawled pages (41.8M) while ClueWeb12 is a 5% sample of the full dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Existing datasets with hand-object interactions are either too small for training deep neural networks  or provide only partial 3D hand or object annotations .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "CIFAR-10 images have a higher dimension than TIMIT (3072 vs. 1845), but the size of the CIFAR-10 training set is only 50,000 compared to 1.1 million examples for TIMIT.",
+                    "annotation_spans": [
+                        {
+                            "start": 88,
+                            "end": 109,
+                            "text": "CIFAR-10 training set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 162,
+                            "end": 167,
+                            "text": "TIMIT",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The results show that our proposed URPCR model obtains higher C-index in most of the datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the current study, we leverage a large dataset of real world search logs to perform a large scale characterization of search tasks with a focus on such differences.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "FGVC-Aircraft Benchmark  is a fine-grained classification dataset of 10, 000 images of 100 types of airplanes.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 23,
+                            "text": "FGVC-Aircraft Benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Results of causal discovery algorithms using the real dataset and the simulated dataset with the same sample size. \"",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The general model for the TREC dataset was based on the general model we built from the user experiment and the threshold was determined based on the targeted dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 38,
+                            "text": "TREC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These datasets are mainly in English and they are not necessarily adequate for researches in the context of Chinese.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments conducted on the MNIST, CIFAR-10 and ImageNet datasets indicate that our attack achieves comparable results to the state-of-the-art",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 66,
+                            "text": "MNIST, CIFAR-10 and ImageNet datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A test set is formed from previously removed edges and an equal number of randomly sampled pairs of unconnected nodes.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The 33 MODIS tiles (highlighted as red boxes) that were used for constructing the evaluation dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, in the constructed sentiment lexicons, 401 features are extracted from Amazon dataset, and 1065 are extracted from Yelp dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 94,
+                            "text": "Amazon dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 124,
+                            "end": 136,
+                            "text": "Yelp dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, the scales of these datasets are insufficient to satisfy the hunger of recent data-driven learning methods.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We regard the factual captions\u0177 f as having the \"factual\" style, denoted as s 0 , which would help model training since the large dataset of factual captions can be included in the training data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": 100 collections were created from the Wikipedia dataset of the ClueWeb [13].",
+                    "annotation_spans": [
+                        {
+                            "start": 40,
+                            "end": 72,
+                            "text": "Wikipedia dataset of the ClueWeb",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, while the addition of the adversarial discriminator results in significant improvements-the absolute performance on the held out collections are still modest, even with adversarial regularization.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In data sets where the point matches distribute non-isotropically, we normalize the variance separately in each axis.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For sanity check, we also trained a ComplEx model over this new dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, the effectiveness of the approach on this data set is a direct indicator of the effectiveness of the different distance functions with increasing uncertainty.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Inverse document frequency is a balance factor to evaluate how important a term is to an ad in the advertisement corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 99,
+                            "end": 119,
+                            "text": "advertisement corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First, we merge ingredients if they share the first or last two words (e.g. bacon cheddar cheese is merged into cheddar cheese); then, we cluster the ingredients that have same word in the first or in the last position (e.g. gorgonzola cheese or cheese blend are clustered together into the cheese category); finally we remove plurals and discard ingredients that appear less than 10 times in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "While it is unreasonable to expect this collection to contain the user in the desired pose, and observed exactly from the viewpoint v, it is assumed the calibration set will contain enough information to extrapolate the appearance of the user from the novel viewpoint v.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the pre-processing stage for extracting features from questions and answers is efficient; (ii) manual feature engineering is avoided by using expressive structural kernels; and (iii) our approach is easily adaptable to diverse QA collections, domains and languages as it only relies on shallow syntactic parsers.",
+                    "annotation_spans": [
+                        {
+                            "start": 227,
+                            "end": 241,
+                            "text": "QA collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our experiments on the web datasets show that for P-Rank the decay factor c doesn't seriously affect the similarity accuracy and accuracy of P-Rank is also higher than SimFusion and SimRank.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We train our HRNet-W48 for single person pose estimation on the PoseTrack2017 training set, where the network is initialized by the model pre-trained on COCO dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 90,
+                            "text": "PoseTrack2017 training set",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 153,
+                            "end": 165,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also experimented on multiclass data sets using SVMs with single and multi-layer arc-cosine kernels, as described in section 2.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, we have similar observations in other datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Web-I dataset, the shared topics seem to characterize general information.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 20,
+                            "text": "Web-I dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Furthermore, such methods that try to solve the selection problem via convex programming are usually too computationally intensive for large datasets .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These interval sets alone would be harmonically ambiguous, so we disambiguate them using harmonic labels, which are included in the training data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The first dataset is the Iris flower dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 25,
+                            "end": 44,
+                            "text": "Iris flower dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the datasets for our experiments we use Reuters-21578, OHSUMED-S, and RCV1-v2.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 56,
+                            "text": "Reuters-21578",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 58,
+                            "end": 67,
+                            "text": "OHSUMED-S",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 73,
+                            "end": 80,
+                            "text": "RCV1-v2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In experiments, real datasets from a dynamic and active-pixel vision sensor, DAVIS, which is a joint event and intensity camera , are used.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Caltech-101 image dataset  consists of 101 object categories and one additional class of background images.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 29,
+                            "text": "Caltech-101 image dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Due to space limitations, below we only reported the results on MSN dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 64,
+                            "end": 75,
+                            "text": "MSN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "M: This dataset is a subset of the Movielens dataset with 6, 040 users, 3, 706 movies and 1, 000, 209 ratings.",
+                    "annotation_spans": [
+                        {
+                            "start": 35,
+                            "end": 52,
+                            "text": "Movielens dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The images are less blurred than those in other datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We illustrate our method on two challenging tasks using two benchmark datasets -detecting pedestrians in video sequences from the INRIA-Motion database  and classifying human actions in UCF-Sports dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 186,
+                            "end": 204,
+                            "text": "UCF-Sports dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 130,
+                            "end": 151,
+                            "text": "INRIA-Motion database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This result can be easily generalized to the case of P partitions by first merging the two collections C 1 and C 2 , then merging this partial result with C 3 and so on.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "dataset consists of total 50 diagnostic test accuracy (DTA) systematic reviews.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To understand how much of the annotation time is spent on what, we analyse timings for speaking and moving the mouse on the ILSVRC dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 124,
+                            "end": 138,
+                            "text": "ILSVRC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, it has been shown in recent literature  that different types of norm constraints fit different data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "If the source and target dataset is similar, we view the source significant subgraphs as strong candidates of significant subgraphs in the target dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Most of these works tried to extract a large number of FAQ pairs from the Web, and use the FAQs dataset to do training and retrieval.",
+                    "annotation_spans": [
+                        {
+                            "start": 91,
+                            "end": 103,
+                            "text": "FAQs dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The ImageCLEF-DA dataset  is a benchmark dataset for ImageCLEF 2014 domain adaptation challenge, which contains three domains: Caltech-256 (C),",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 24,
+                            "text": "ImageCLEF-DA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The Cityscapes dataset  contains 2975 training images, 500 validation images, and 1525 test images of resolution 2048 \u00d7 1024 px.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 22,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to evaluate our algorithm, we compare other state-of-the-art feature selection algorithms in 9 real data sets from various domains, including gene expression, general UCI benchmark data, and multimedia data.",
+                    "annotation_spans": [
+                        {
+                            "start": 176,
+                            "end": 194,
+                            "text": "UCI benchmark data",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We fix the background collection model zN , and apply the EM algorithm to estimate the z F R. The detailed EM step is listed in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For datasets that use CLEVR's images, we train a separate Faster R-CNN for multi-class classification and bounding box regression, because the Faster R-CNN trained on Visual Genome did not transfer well to CLEVR.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For datasets satisfying a spectrum of weak to strong properties, we gave query bounds, and showed that a class of clustering functions containing Single-Linkage will find the target clustering under the strongest property.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper, we propose a hybrid model to incorporate three different retrieval techniques that have proven to be effective for the ad-hoc retrieval on the TREC collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 158,
+                            "end": 174,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset and the evaluation tools will continue to be available as web services at the Challenge web site .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Also, although the model describes how the cranium is affected given the age of the subject, it is biased in terms of ethnicity, due to the lack of ethnic diversity in the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The number of features chosen (i. e. through the boosting iterations) is set to 50 for both datasets",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For protein sequence classification under the semi-supervised setting, we also use the Swiss-Prot dataset, a collection of about 100K protein sequences, as an unlabeled dataset, following the setup of .",
+                    "annotation_spans": [
+                        {
+                            "start": 87,
+                            "end": 105,
+                            "text": "Swiss-Prot dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The success of TREC, CLEF, and NTCIR has established the importance of building reusable, large-scale standard test collections in information access research.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As the portion for the validation set increases, the performance drops consistently due to the lack of exemplars (from the old classes) to train the feature layers.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, fundamental difficulty still remains due to the lack of available in-the-wild 3D body or hand datasets that provide paired images and 3D pose data; thus most of the previous methods only demonstrate results in controlled lab environments.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These are implemented in IBM ProbE TM (for Probabilistic Estimation) data mining engine, which is an object-oriented framework for building segmented predictive models from massive, out-of-core training data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The RST Treebank contains Wall Street Journal articles that have been manually annotated with RST structures by Carlson et al.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 37,
+                            "text": "RST Treebank contains Wall Street",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The test part of the 3DMatch data set consists of 8 indoor scenes split into several partially overlapping fragments.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 37,
+                            "text": "3DMatch data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "One way to measure the unimodal biases in VQA datasets is to train an unimodal model which takes only one of the two modalities as input.",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 54,
+                            "text": "VQA datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct an extensive empirical evaluation on several real data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For both CIFAR-10 and NUS-WIDE datasets, we randomly sample 1,000 points as query set, 1,000 points as validation set, and all the remaining points as training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 9,
+                            "end": 39,
+                            "text": "CIFAR-10 and NUS-WIDE datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the performance of our HD 3 S model, we benchmark our result on the KITTI stereo dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 80,
+                            "end": 100,
+                            "text": "KITTI stereo dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We constrain the learned noise to be at least 0.1 to regularize the poorly conditioned kernel matrix for the houseelectric dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 109,
+                            "end": 130,
+                            "text": "houseelectric dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Effectiveness:  show the rank correlation between two ranks by a ranking algorithm and AA on Amazon, Bookcrossing, and Epinions datasets, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 93,
+                            "end": 136,
+                            "text": "Amazon, Bookcrossing, and Epinions datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Extensive experiments on real-world Web search data sets have demonstrated great potential and effectiveness of the proposed framework and algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The datasets shows a high variation in terms of categories per tasks and the number of selected object categories varies between 6 and 30 depending on the task.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the complete formulation of L GAN and the gradient penalisation term refer to  \u03b1 \u2190 Pre-train VAE, discard Decoder and retain Encoder (Q) parameters \u03b8, \u03c9, \u2113, \u2190 initialise Generator (G), Discriminator (D), LTF (F) parameters respectively for itr = 1 to iters do X r \u2190 random mini-batch from dataset Z g \u2190 sample noise N",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "EXPERIMENT DESIGN 2.1 Dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Section 2 introduces the process of data collection from multiple sources.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also evaluate our method on MSCOCO dataset to show the generalization ability of our method and results are given in .",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 45,
+                            "text": "MSCOCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the experiment, users selected pairs from the SUN database to be the closest to each other 50% of the time, while the pairs from the Places database were judged to be the most similar only on 17% of the trials.",
+                    "annotation_spans": [
+                        {
+                            "start": 49,
+                            "end": 61,
+                            "text": "SUN database",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 136,
+                            "end": 151,
+                            "text": "Places database",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For large data sets, kernel k-means is very slow, and so we used a streaming method proposed by Chitta et al.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this data set we use 9705 dimensional TFIDF features.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we further analyze the qualitative results, the weights visualization in pointing mechanism, and the effect of the tradeoff parameter \u03bb for novel object captioning task on held-out COCO dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 198,
+                            "end": 210,
+                            "text": "COCO dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To exploit datasets with general objects for tracking, numerous Siamese based trackers",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For querying the WIKI dataset, we extracted the 300 most frequent queries which had a result click on the domain en.wikipedia.org; similarly for UKGOV, we compiled 50 queries which had result hit on .gov.uk domains.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 29,
+                            "text": "WIKI dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 145,
+                            "end": 150,
+                            "text": "UKGOV",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(  shows that gpfm does significantly better than fm on all of the datasets except adom.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the traffic in our dataset does not include the concept of lane-driving, we used the version of CS-LSTM that does not include lane information for a fairer comparison.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ".b shows a performance comparison with previous approaches [1] tested on the same data set (though on a different partition).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We conduct extensive experiments on both synthetic and real datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We first compare the five methods on the Newsgroups data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 41,
+                            "end": 60,
+                            "text": "Newsgroups data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed approach achieves the state-of-the-art results on two challenging benchmarks: the Volleyball dataset  and the Collective Activity dataset .",
+                    "annotation_spans": [
+                        {
+                            "start": 123,
+                            "end": 150,
+                            "text": "Collective Activity dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 95,
+                            "end": 113,
+                            "text": "Volleyball dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "(3) SCENE dataset (UCI benchmark) consisting of 2230 images (1137 training and 1093 test instances) categorized into 6 scene types (with each class consisting of at least 100 positive instances).",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 17,
+                            "text": "SCENE dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 19,
+                            "end": 32,
+                            "text": "UCI benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the past decade, topic modeling , such as Probabilistic Latent Semantic Analysis (PLSA) or Latent Dirichlet Allocations (LDA) , has successfully revealed the thematic structure of collection of documents with exploring the patterns represented by word co-occurrence matrix.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The word embeddings for the experiments reported in this section were obtained on the TREC document collection with the parameter settings as prescribed in , i.e., we embedded the word vector in a 200 dimensional space, and used continuous bag-of-words  with negative sampling.",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 110,
+                            "text": "TREC document collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ensure that questions in our dataset require the agent to navigate and perceive to answer accurately.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We apply our algorithm to latent collaborative retrieval task on Million Song Dataset  which consists of 1,129,318 users, 386,133 songs, and 49,824,519 records; for this task, a ranking algorithm has to optimize an objective function that consists of 386, 133 \u00d7 49, 824, 519 number of pairwise interactions.",
+                    "annotation_spans": [
+                        {
+                            "start": 65,
+                            "end": 85,
+                            "text": "Million Song Dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this table, the column titled \"# Censored\" corresponds to the number of censored instances in each dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since N-grams and Sequential Pattern Mining (which requires a POS tagger) are relatively complicated methods (vs. simple heuristics such as finding question marks and 5W1H words), the computational effort may be impractical for large datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, while reducing the complexity, the deep capsule network must be able to handle richer data sets than MNIST.",
+                    "annotation_spans": [
+                        {
+                            "start": 111,
+                            "end": 116,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We highlight that for the Face dataset on the Gaussian kernel, it took DG 1.92 days, while ISM finished within 0.99 seconds: a 10 5 -fold speed difference.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 38,
+                            "text": "Face dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The last data set was a regression modeling problem, and therefore the classification measure needed to be redefined.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The TREC Web track diversity datasets created to study the problem of novelty and diversity are most suitable to our work.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 37,
+                            "text": "TREC Web track diversity datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We noticed that it shows especially large improvements on large-scale datasets such as on PKU Ve-hicleID, where we improve by 3.6% over the baseline with Margin loss  and surpass the state-of-the-art by 1% in terms of Recall@1 score on the large test set.",
+                    "annotation_spans": [
+                        {
+                            "start": 90,
+                            "end": 104,
+                            "text": "PKU Ve-hicleID",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use the wine quality data set from the UCI repository which includes 1599 measurements of 11 physiochemical properties and a quality variable of red \"Vinho Verde\" .",
+                    "annotation_spans": [
+                        {
+                            "start": 42,
+                            "end": 56,
+                            "text": "UCI repository",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 11,
+                            "end": 32,
+                            "text": "wine quality data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, we compare our approach to the performance of more than 3,500 human machine learning expert teams on a large scale business data set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is possibly caused by the incompleteness of the stream corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 53,
+                            "end": 66,
+                            "text": "stream corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In order to compare to the PixelCNN literature, we additionally train L3C on the ImageNet32 and ImageNet64 datasets , each containing 1 281 151 training images and 50 000 validation images, of 32 \u00d7 32 resp. 64 \u00d7 64 pixels.",
+                    "annotation_spans": [
+                        {
+                            "start": 81,
+                            "end": 115,
+                            "text": "ImageNet32 and ImageNet64 datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We randomly choose 1,193 images as training set and use the remaining as test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To provide such functionality, we propose two techniques to process patent documents on demand and extract terms and key phrases in order to form a query to retrieve relevant documents from the patent corpus.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For training and test data splitting, we follow  to use the original splitting on Netflix and Yahoo!",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 100,
+                            "text": "Netflix and Yahoo!",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each model, we randomly select 100 inputs from the development dataset as attack targets, and compare the output length distributions from random perturbation and PGD attacks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus, the TTI and Hamming-distance content of the codebook remained identical to RC sep , but the spatial contiguity of the stimulus events was broken: that is to say, it was no longer a coherent row or column that flashed during any one epoch, but rather a collection of 6 apparently randomly scattered letters.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the TREC collection, we used the topic title as raw query.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 23,
+                            "text": "TREC collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The TAC 2015 collection has 166 newswire articles and discussion forum threads.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 23,
+                            "text": "TAC 2015 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They do this with adversarial training, which they noted had great difficulty scaling up to the ImageNet corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 96,
+                            "end": 111,
+                            "text": "ImageNet corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "for determining the number of domains K to identify from the multiple training datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experiments on the seven real datasets further corroborate observations made over synthetic data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the proposed technique, we used the following three major data sets :",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We do not outperform the current sate-of-the-art in such datasets, since our algorithm tries to account for heterogeneous agents and weighted interactions even when interactions are sparse and mostly homogeneous.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows samples of the training data set while  shows reconstructions of the same samples using the MAP \u03bb's in the full model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset is formulated as follows : Foursquare users usually report their check-ins of POIs via Twitter.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results on benchmark datasets MovieLens-1M and Amazon Movies show that our proposed Kr Network outperforms state-of-the-art baselines but needs more time for training compared with traditional RNNs.",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 55,
+                            "text": "MovieLens-1M",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 60,
+                            "end": 73,
+                            "text": "Amazon Movies",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As a result, we obtain a rotation-augmented test set with 74,040,000 point clouds in total as the benchmark dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The fundamental idea is that words that are assigned high weights for a given document are good discriminators for that document from the rest of the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For each lake, we created a buffer region of 20 pixels at 500m resolution around the periphery of the water body, and used the buffer region as well as the interior of the water body to construct the evaluation dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results also show that the \"risk-averse\" approach, even without smoothing from the collection statistics, performs as well as three commonly-adopted retrieval models, namely, the Jelinek-Mercer and Dirichlet smoothing methods, and BM25 model.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "provides a brief overview of the GQA components and generation process, and figure 3 presents multiple instances from the dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Finally, for datasets satisfying a spectrum of weak to strong properties, we give query bounds, and show that a class of clustering functions containing Single-Linkage will find the target clustering under the strongest property.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "where X X is the set of N training point input positions, and K X,X R N \u00a2N is the kernel covariance matrix evaluated on the training dataset which is a partition of K R M \u00a2M , the covariance matrix evaluated on the full tensor product grid;",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As conjectured in , noisy web collections could be a more discriminative testbed for dependence models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We observe that the model is unable to learn the probability distribution on KITTI, whereas it is capable of learning the distribution on the larger ATG4D. , our approach performs worse than current state-of-the-art bird's eye view detectors on this small dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 82,
+                            "text": "KITTI",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 149,
+                            "end": 154,
+                            "text": "ATG4D",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, the Enron data set -arguably the most widely studied email data set-was only released because of a court order.",
+                    "annotation_spans": [
+                        {
+                            "start": 17,
+                            "end": 31,
+                            "text": "Enron data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "By separately mining with a FCIM algorithm the partitions of a dataset we may in fact generate frequent itemsets that are not globally closed in the whole dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "First for each search query s, we identify the set Q(s) of all questions associated with s in our dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "E.g., on the CIFAR-100 dataset, a state-of-the-art method  achieves only 40.1% accuracy for 1-shot learning, compared to 75.7% for the all-class fully supervised case .",
+                    "annotation_spans": [
+                        {
+                            "start": 13,
+                            "end": 30,
+                            "text": "CIFAR-100 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These references are representative of th state-of-the-art for deep and shallow architectures on these data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Evaluations using real-world datasets demonstrate that WEMAREC outperforms state-of-the-art matrix approximation methods in recommendation accuracy (0.5-11.9% on the MovieLens dataset and 2.2-13.1% on the Netflix dataset) with 3-10X improvement on scalability.",
+                    "annotation_spans": [
+                        {
+                            "start": 166,
+                            "end": 183,
+                            "text": "MovieLens dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 205,
+                            "end": 220,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate each recommendation method on each dataset in two scenarios.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, the inlier ratios of the neighbors of correct matches drop considerably on the more challenging COLMAP dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 105,
+                            "end": 119,
+                            "text": "COLMAP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The development dataset consists only of tweets whereas the final evaluation dataset included also short text messages (SMS in the tables below).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that if significant cluster structure in the original dataset has been destroyed by the random permutations, some of the values of Q(P k 0 ) should fall well outside the range of variation seen in the Q(P k i ) boxplots.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "n(qi,j) is the accumulated Cross Term's average value on each document over the collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The introduction of large scale food datasets, such as Food-101  and Recipe1M , to-gether with a recently held iFood challenge 2 has enabled significant advancements in visual food recognition, by providing reference benchmarks to train and compare machine learning approaches.",
+                    "annotation_spans": [
+                        {
+                            "start": 55,
+                            "end": 63,
+                            "text": "Food-101",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 69,
+                            "end": 77,
+                            "text": "Recipe1M",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Training Stability: For all \u03b8 \u2208 \u0398, V , and all training sets T and T that differ in a single entry, Validation Stability: For all T , \u03b8 \u2208 \u0398, and for all V and V that differ in a single entry, Condition , the training stability condition, bounds the change in the validation score q, when one person's private data in the training set T changes, and the validation set V as well as the value of the random variable R remains the same.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": this is a collection of mobile app, whose price is 0 and has more than 100,000 ratings.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "reports the speedup (parallel time divided by sequential time) for processing Twitter dataset with different numbers of cores in the Intel cluster aforementioned.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 93,
+                            "text": "Twitter dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Then, they compute the distribution of each unit activation map over the whole dataset, and determine a threshold for each unit based on its activation distribution.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For many of these models variance estimation is the main computational bottleneck in applications involving large scale datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To show this, we replace the generated noise dataset by independent random draws of 200 positives and 800 negatives from the OHSCAL text classification benchmark having 11,162 cases, available online.",
+                    "annotation_spans": [
+                        {
+                            "start": 125,
+                            "end": 161,
+                            "text": "OHSCAL text classification benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Experimental results across multiple datasets demonstrate that our method can significantly improve the performance in both question detection and answer finding subtasks.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Classification accuracy on the 360 \u2022 -ified test set is used as the evaluation metric.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this paper we present the Probabilistic Matrix Factorization (PMF) model which scales linearly with the number of observations and, more importantly, performs well on the large, sparse, and very imbalanced Netflix dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 209,
+                            "end": 224,
+                            "text": "Netflix dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The WWW-s is a small-sized test case using the 1,085 web pages that     generate an ambiguous name dataset, we clustered author names from the entire DBLP citation data.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate NCN on the RefSeer dataset 2 .",
+                    "annotation_spans": [
+                        {
+                            "start": 23,
+                            "end": 40,
+                            "text": "RefSeer dataset 2",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Note that the model generating the predictions for the above images, which we captured with an ordinary camera, was trained only on images from our synthetic dataset, ObMan.",
+                    "annotation_spans": [
+                        {
+                            "start": 167,
+                            "end": 172,
+                            "text": "ObMan",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given a dataset satisfying Strict Separation, there exists an algorithm which can find the target partitioning for any hypothesis class in O(k)",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given noisy sensor data sets A and B, (1) we first learn sparse structures based on given covariance matrices.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We evaluate our model on the BLOG06 collection using five standard TREC 2008 baselines.",
+                    "annotation_spans": [
+                        {
+                            "start": 29,
+                            "end": 46,
+                            "text": "BLOG06 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Samples from the first two cameras constitute the training set and samples from the other camera constitute the testing dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "subset of the USPS handwritten digits dataset, consisting of 1540 gray scale 16 \u00d7 16 images, rescaled within [",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 45,
+                            "text": "USPS handwritten digits dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The basic information of the used data stream datasets is introduced as follows.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "in the training dataset, we take the |w| records preceding r as r's historical window 7 H t u,i , and compute r's sd t u",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Moreover, the SAVAM dataset  only presented the left view to subjects, such that the acquired eye fixation data cannot be adopted to investigate the stereoscopic video saliency.",
+                    "annotation_spans": [
+                        {
+                            "start": 14,
+                            "end": 27,
+                            "text": "SAVAM dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We demonstrate our method on challenging object recognition datasets, and show that interleaving multiple taxonomic views yields significant accuracy improvements.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Because of space constraints, we only report results with the News and Tags datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 62,
+                            "end": 84,
+                            "text": "News and Tags datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since accuracy is inappropriate as a measure of effectiveness for many of the data sets as the proportion of positives is very small, we measured performance using the F1 score, the harmonic mean of precision and recall.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each dataset contains digits, ranging from 0 to 9.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For both the numeric data sets (left table, top lines) and the network data sets (right table) we see that the training performance of NNC is comparable to the other algorithms.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The snippet retrieval track of INEX 2011 focuses on how best to generate informative snippets for XML search results, in which the Wikipedia corpus is used.",
+                    "annotation_spans": [
+                        {
+                            "start": 31,
+                            "end": 40,
+                            "text": "INEX 2011",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 131,
+                            "end": 147,
+                            "text": "Wikipedia corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "An important aspect of book collections is the perception of each book as a cohesive semantic unit.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To facilitate the task of MMR, we use only a single unseen emoji as the overlaid visual motif on the entire test set (1000 images in total).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the Treebank corpus (left), head words (verbs) are shared, but the nouns split off into many separate specialized categories before feeding into pronoun sinks.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 22,
+                            "text": "Treebank corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To this end, we take ICDM Abstracts dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 43,
+                            "text": "ICDM Abstracts dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The collection contains approximately 4.6 million records, or about a third of the entire database at the time it was collected in 2004 (commonly known as the MEDLINE04 collection).",
+                    "annotation_spans": [
+                        {
+                            "start": 159,
+                            "end": 179,
+                            "text": "MEDLINE04 collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "further demonstrate the importance of temporal information by using longer clips (e.g., 40 frames) and taking advantage of I3D pre-trained on the large-scale video dataset .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In this section, we present numerical experiments on both simulated and real-world Amazon Mechanical Turk datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 83,
+                            "end": 114,
+                            "text": "Amazon Mechanical Turk datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the ranking of instances from HTTP dataset, outliers have long tail in ranking and some of them are not included the labeling set.",
+                    "annotation_spans": [
+                        {
+                            "start": 33,
+                            "end": 45,
+                            "text": "HTTP dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In practice, if the number of dimensions is large enough, kd-tree and other similar data structures require an expensive inspection in the data set, thereby perform no better than an exhaustive linear search that simply compares a query to every data point in the database.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We generate the training dataset for the study as described previously, using emojis  as the visual motifs blended on images of size 256 \u00d7 256 pixels.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For NUS-WIDE data set, we have n = 209, 347 images each represented by a bag-of-words model with d = 500 visual words, and 81 labels.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 21,
+                            "text": "NUS-WIDE data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The value of \u03b2 was fixed to 4, whereas the value of \u03b1 was varied over the different data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is in not the case for the ClueWeb collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 32,
+                            "end": 51,
+                            "text": "ClueWeb collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "6.1; for these and the following tests, we average over a representative dataset of six different shape classes (cat, centaur, dog, hand, human, squirrel) of varying resolution (ranging from 6K to 28K vertices).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For motivating the transductive IDC, consider a data set X that has emerged from a statistical mixture which includes several sources (classes).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the future, we plan to examine the behavior of our algorithm in more skewed datasets and realworld datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This paper presents a novel online video recommendation system called VideoReach, which alleviates users' efforts on finding the most relevant videos according to current viewings without a sufficient collection of user profiles as required in traditional recommenders.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "ref #text  BRISQUE Quantitative evaluation with simulated datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This is a very-fine-grained single category dataset, so we do not have enough data to train a dic-",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The singular value decomposition (SVD) is fundamental to many data modeling/mining algorithms, but SVD algorithms typically have quadratic complexity and require random access to complete data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the 2003-2004 Novelty collection, the response variable is the \"opinion\" (0) or \"event\" (1) classification of each  topic, which is manually assigned by the TREC organizers.",
+                    "annotation_spans": [
+                        {
+                            "start": 8,
+                            "end": 36,
+                            "text": "2003-2004 Novelty collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The instance size statistics for all data sets are summarized in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To select the number of fine-tuning epochs, we use a validation split of the CIFAR-10 training dataset with clean labels and select a value to bring accuracy close to that of Normal Training.",
+                    "annotation_spans": [
+                        {
+                            "start": 77,
+                            "end": 102,
+                            "text": "CIFAR-10 training dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We report evaluation results comparing those various techniques on standard AOR collections in Section 7.",
+                    "annotation_spans": [
+                        {
+                            "start": 76,
+                            "end": 91,
+                            "text": "AOR collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Examples include smoothing from collection statistics , the latent models , and Dirichlet models .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": Examples of our cross-view translation results on two public benchmarks i.e. Dayton  and CVUSA , and on our self-created large-scale benchmark based on Ego2Top .",
+                    "annotation_spans": [
+                        {
+                            "start": 79,
+                            "end": 85,
+                            "text": "Dayton",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 91,
+                            "end": 96,
+                            "text": "CVUSA",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 154,
+                            "end": 161,
+                            "text": "Ego2Top",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the Ghost-SVD algorithm and the EigenRatio algorithm work on binary tasks, they are evaluated only on the Bird, RTE and TREC datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 112,
+                            "end": 139,
+                            "text": "Bird, RTE and TREC datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the drosophila promoters data set, we test the effect of accuracy parameter p 0 on accuracy and average length of prefix in prediction.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 36,
+                            "text": "drosophila promoters data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the AOL and Health Q&A datasets, we employed AMT master workers from the USA and collected 5 judgements for each of the profiles.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 43,
+                            "text": "AOL and Health Q&A datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For instance, on the KITTI benchmark, following , we remove all points higher than 1m above the fictitious LiDAR source (located on top of the autonomous vehicle).",
+                    "annotation_spans": [
+                        {
+                            "start": 21,
+                            "end": 36,
+                            "text": "KITTI benchmark",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Each curve represents an average over 100 runs of the corresponding algorithm on training samples drawn independently and uniformly at random from the whole dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For the location dataset, we also present a selection strategy.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Data Set: We adopted all the 50 queries from the Topic Distillation task of TREC2003 as our query set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We highlight that we were able to achieve a near state-of-the-art performance across datasets while surpassing the results of all the existing capsule network models.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All of our experiments suggest that our results can be improved simply by waiting for faster GPUs and bigger datasets to become available.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Given that the set of comparisons is quadratic, a na\u00efve implementation of the algorithm leads to a complexity of O(N 2 d 2 K), where N is the size of the dataset, d is the dimension of the feature space, and K is the size of the selected set of comparisons.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate the performance of the LOAD model on Wikipedia based on the two event data sets, we use the description of events as query input and evaluate the resulting ranking of dates with respect to the known date.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For Omniglot dataset, we use a batch size of 32 and 16 for 5-way and 20-way classification, respectively.",
+                    "annotation_spans": [
+                        {
+                            "start": 4,
+                            "end": 20,
+                            "text": "Omniglot dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Even if rewards were still simulated, this scenario is more realistic since the values of the parameters were extracted from a real-world dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "However, we do find it does not work well enough for the \"Two Patterns\" dataset if we consider the diminishing returns.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 79,
+                            "text": "\"Two Patterns\" dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We use Yahoo's Webscope L4 high quality \"Manner\" collection .",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 59,
+                            "text": "Yahoo's Webscope L4 high quality \"Manner\" collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Cityscapes dataset  provides a considerable amount of stereo images, while the disparity labels are pre-computed by SGM method .",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 18,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "For example, on Tic-tac-toe data set, the test error becomes 0.63%, more than half the error rate reduction.",
+                    "annotation_spans": [
+                        {
+                            "start": 16,
+                            "end": 36,
+                            "text": "Tic-tac-toe data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Consequently, many practitioners in the field of data mining have abandon deterministic approaches in favor of randomized ones when dealing with today's large-scale data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test our method on TREC collections of varying sizes and types.",
+                    "annotation_spans": [
+                        {
+                            "start": 22,
+                            "end": 38,
+                            "text": "TREC collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To evaluate our method, we compare with the stateof-the-art classification methods on ImageNet dataset (ILSVRC 2012).",
+                    "annotation_spans": [
+                        {
+                            "start": 86,
+                            "end": 102,
+                            "text": "ImageNet dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Therefore, we cannot expect that GMM converges to the output of MM on the sushi dataset, since the consistency results (Corollary 3) assumes that the data is generated under PL.",
+                    "annotation_spans": [
+                        {
+                            "start": 74,
+                            "end": 87,
+                            "text": "sushi dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the statistical view-point, since most of categories have very few training instances which belong to them, it becomes harder to detect them in test set.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "These systems are able to achieve effectiveness close to a search over the entire collection (exhaustive search) while using only a few shards for each  : The proportion of system instances that demonstrated a significant difference using a paired t-test, and the p values when comparing the sample-based IR algorithm proposed by Kulkarni and Callan  at varying CSI sample rates with a deterministic exhaustive search, and with itself (a nondeterministic algorithm) with a CSI sample rate of 4% using the TREC GOV2 dataset and TREC topics 701 -850. query.",
+                    "annotation_spans": [
+                        {
+                            "start": 505,
+                            "end": 522,
+                            "text": "TREC GOV2 dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 527,
+                            "end": 547,
+                            "text": "TREC topics 701 -850",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "\u221e and \u01eb = 0.3 and optimize with PGD  using 40 iterations and stepsize 0.0075 for all datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the next experiment, the proposed sparse feature learning method was trained on 5x5 image patches extracted from the MNIST training set.",
+                    "annotation_spans": [
+                        {
+                            "start": 120,
+                            "end": 138,
+                            "text": "MNIST training set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In Section 5, we analyze our approaches using the Twitter dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 65,
+                            "text": "Twitter dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the results reported in , the MoRM and DfRes approaches both perform feedback using all external resources as well as the target collection, while RM3 only performs feedback using the target collection.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Empirical results on visual retrieval and classification tasks with six benchmark datasets, i.e., MNIST, CIFAR10, SHREC13, SHREC14, ModelNet10, and ModelNet40, demonstrate the superiority of the proposed method.",
+                    "annotation_spans": [
+                        {
+                            "start": 98,
+                            "end": 103,
+                            "text": "MNIST",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 105,
+                            "end": 112,
+                            "text": "CIFAR10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 114,
+                            "end": 121,
+                            "text": "SHREC13",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 123,
+                            "end": 130,
+                            "text": "SHREC14",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 132,
+                            "end": 142,
+                            "text": "ModelNet10",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 148,
+                            "end": 158,
+                            "text": "ModelNet40",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Returning to the point of capturing both specific and general aspects of documents as discussed in the introduction of the paper, we generated 500 queries of length 3-5 using randomly selected lowfrequency words from the NIPS corpus and then ranked documents relative to these queries using several different methods.",
+                    "annotation_spans": [
+                        {
+                            "start": 221,
+                            "end": 232,
+                            "text": "NIPS corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our approach generalizes the traditional collaborative ranking approach by incorporating Twitter features such as content information, and social relation information, so our model fully utilizes the information mentioned on Twitter and can do better personalized recommendations.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We note that below 12.5% of the links (less than 3 incoming links per page), the density is well below the link densities of earlier TREC Web collections.",
+                    "annotation_spans": [
+                        {
+                            "start": 133,
+                            "end": 153,
+                            "text": "TREC Web collections",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "They also built a Social Relation In Video (S-RIV) dataset which contained about 3,000 video clips with multi-label annotation.",
+                    "annotation_spans": [
+                        {
+                            "start": 18,
+                            "end": 58,
+                            "text": "Social Relation In Video (S-RIV) dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The left hand side of  shows plots for the test error residuals (for the Boston housing data set) together with its upper bounds computed using the bound of  and the sample compression bound of Corollary 1.",
+                    "annotation_spans": [
+                        {
+                            "start": 73,
+                            "end": 96,
+                            "text": "Boston housing data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To demonstrate modeled stream utility (MSU), we use the TST 2013 test collection.",
+                    "annotation_spans": [
+                        {
+                            "start": 56,
+                            "end": 80,
+                            "text": "TST 2013 test collection",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "All reported results are based on the performance on the test sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "SVHN and TinyImageNet SVHN is a dataset of 10-class digit photographs, and TinyImageNet is a 200-class subset of ImageNet.",
+                    "annotation_spans": [
+                        {
+                            "start": 0,
+                            "end": 4,
+                            "text": "SVHN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 9,
+                            "end": 26,
+                            "text": "TinyImageNet SVHN",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 75,
+                            "end": 87,
+                            "text": "TinyImageNet",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 113,
+                            "end": 121,
+                            "text": "ImageNet",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Modeling data using low-dimensional representations is a fundamental approach in data analysis, motivated by the inherent redundancy in many datasets and to increase the interpretability of data via dimensionality reduction.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We tested RLSI, NMF, GRLSI and GNMF on the Wikipedia dataset and Web",
+                    "annotation_spans": [
+                        {
+                            "start": 43,
+                            "end": 60,
+                            "text": "Wikipedia dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "the maximum ratios, or distortions (see Section 5.3), incurred by Double Pareto happen around the knees: the ratios, there, are close to 2.8 in the Gutenberg corpus, and to 2.3 in the News corpus.",
+                    "annotation_spans": [
+                        {
+                            "start": 148,
+                            "end": 164,
+                            "text": "Gutenberg corpus",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 184,
+                            "end": 195,
+                            "text": "News corpus",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although our proposed method is designed primarily for large-scale connectomics datasets, we evaluate our method on the popular SNEMI3D challenge dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 128,
+                            "end": 153,
+                            "text": "SNEMI3D challenge dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although we compare with other methods in their preferred resolution and datasets for a fair comparison, we note that one of the unique advantages of our method is that it performs well in various resolutions (in our experiments, from 32 to 224), while others can only work on a limited range of resolutions.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Applied to the International Stroke Trial dataset, we applied model selection to select the learning technique that optimizes predictive accuracy.",
+                    "annotation_spans": [
+                        {
+                            "start": 15,
+                            "end": 49,
+                            "text": "International Stroke Trial dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "From the result we can see that the polynomial degree has different improvement trend on different datasets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The two domains demonstrate distinct distributions because images from SVHN dataset contain cluttered background from streets and cropped digits near the image boundaries.",
+                    "annotation_spans": [
+                        {
+                            "start": 71,
+                            "end": 83,
+                            "text": "SVHN dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus for a given dataset it makes sense not only to do cross-validation of the parameter h of the kernel function but also over different loss functions in order to adapt to possible outliers in the data. .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The dataset was made publicly available by Hubway for the purposes of its Data Visualization Challenge 4 .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The proposed ranker has the same ability to incorporate collection statistics into the estimation.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We test our method on the challenging Cityscapes dataset and show that we achieve top results, surpassing Mask R-CNN with an Average Precision score of 27.6 versus 26.2, at a frame rate of more than 10 fps.",
+                    "annotation_spans": [
+                        {
+                            "start": 38,
+                            "end": 56,
+                            "text": "Cityscapes dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The number a of attributes for each data set is also specified in .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": ": The CRU dataset is a highly gridded dataset containing precipitation for land locations only (red region).",
+                    "annotation_spans": [
+                        {
+                            "start": 6,
+                            "end": 17,
+                            "text": "CRU dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "where T it er is the the number of iterations, N S B P R is the number of pairwise constraints sampled for BPR in each iteration, and m \u00d7 n 2 is the number of all pairwise samples from a dataset of m users and n items .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We perform additional ablation experiments on the UNC dataset to further investigate the relative contribution of each component of our proposed model.",
+                    "annotation_spans": [
+                        {
+                            "start": 50,
+                            "end": 61,
+                            "text": "UNC dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our initial experiments reported here were carried out using Matlab with CVX, which prevented us from evaluating our approach on larger data sets, such as the full LETOR 2.0 data set.",
+                    "annotation_spans": [
+                        {
+                            "start": 164,
+                            "end": 182,
+                            "text": "LETOR 2.0 data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "To our knowledge, no public CQA dataset releases the identities of the voters.",
+                    "annotation_spans": [
+                        {
+                            "start": 28,
+                            "end": 39,
+                            "text": "CQA dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "As measured by teaching assistants, according to the course's evaluative criteria, including originality, practicality, and commercial transfer potential, students using combinFormation on prior work collections were found to develop better inventions than those using Google and Word, and the results were statistically significant .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We also compare ShanghaiTechRGBD with other RGB-D crowd counting datasets in , and we can see that ShanghaiTechRGBD is the most challenging RGB-D crowd counting dataset in terms of the number of images and heads.",
+                    "annotation_spans": [
+                        {
+                            "start": 44,
+                            "end": 73,
+                            "text": "RGB-D crowd counting datasets",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 140,
+                            "end": 168,
+                            "text": "RGB-D crowd counting dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 99,
+                            "end": 115,
+                            "text": "ShanghaiTechRGBD",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 16,
+                            "end": 32,
+                            "text": "ShanghaiTechRGBD",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the classification errors on the test sets for the UCI datasets studied, for both RGB and GB, see  in the appendix for details on the dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 57,
+                            "end": 69,
+                            "text": "UCI datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments on real Learning-to-Rank datasets show that QuickScorer is able to achieve speedups over the best state-of-the-art baseline ranging from 2x to 6.5x. .",
+                    "annotation_spans": [
+                        {
+                            "start": 24,
+                            "end": 49,
+                            "text": "Learning-to-Rank datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "A versioned document collection C is a set of documents d0, . .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "shows the relatedness of attributes in different data sets.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The size of common fMRI data sets and the space of hidden variables in our model makes stochastic inference methods, such as Gibbs sampling, prohibitively slow.",
+                    "annotation_spans": [
+                        {
+                            "start": 19,
+                            "end": 33,
+                            "text": "fMRI data sets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "we select the |T | highest rating set T from the test dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We used an ex-tremely simple approach in which we treated a collection as a single string and took samples of lengths s (say 1024 bytes) at evenly-sized intervals across the collection (with m of say 0.5 Gb or 1.0 Gb).",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Since the resolution of the eye image in Real-Video dataset is lower than MPIIGaze dataset (and other real-world factors), it makes gaze prediction more challenging than on the MPI-IGaze dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 177,
+                            "end": 194,
+                            "text": "MPI-IGaze dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 74,
+                            "end": 90,
+                            "text": "MPIIGaze dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 41,
+                            "end": 59,
+                            "text": "Real-Video dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Thus we introduce a large-scale RGB-D dataset by capturing images from crowded scenes at different places.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "We have illustrated some of our results with representative plots from either the NYSE or S&P500 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 82,
+                            "end": 104,
+                            "text": "NYSE or S&P500 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "The experiments used both Foursquare and Whrrl datasets.",
+                    "annotation_spans": [
+                        {
+                            "start": 26,
+                            "end": 55,
+                            "text": "Foursquare and Whrrl datasets",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "lists the performance of different backbones w/o GLA on the validation set of PASCAL VOC 2012 dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 78,
+                            "end": 101,
+                            "text": "PASCAL VOC 2012 dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In both conditions, stimulus events were repeated with a stimulus onset asynchrony (SOA) of 167 msec, which as close as our hardware could come to recreating the 175-msec SOA of competition III dataset II.",
+                    "annotation_spans": [
+                        {
+                            "start": 178,
+                            "end": 204,
+                            "text": "competition III dataset II",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In the experiments, we use three real-world data stream datasets which have been previously studied by other researchers for data streams learning .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "This dataset, collected from the CDC FluView website 2 , contains the weekly influenza activity levels (from 1 to 10) for all the states in U.S. from 2009 to 2016After removing the states with missing data we kept 29 states remaining in this dataset.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "In contrast to  we neither use a different parameter set for each test dataset  nor do we use one of the test datasets during training .",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Our work involves two key insights: (1) Video prediction can be approached as a stochastic process: we sample a collection of proposals conforming to possible frame distribution at following time stamp, and one can select the final prediction from it.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "On the E. Coli data set, we test the effect of parameter \u03c9 in Equation 3.1 on the accuracy and the early prediction of SCR and GSDT using the setting described before except for varying \u03c9.",
+                    "annotation_spans": [
+                        {
+                            "start": 7,
+                            "end": 23,
+                            "text": "E. Coli data set",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although their system can identify questions and answers as well as other types of threads, their dataset was small and they only provided precision measures in their experimental results.",
+                    "annotation_spans": []
+                }
+            ]
+        },
+        {
+            "body_text": [
+                {
+                    "text": "Although the cardinality of the set of all answers given a QI dataset is potentially infinite, researchers have observed that a set of a few thousand (typically 3000 or so) most frequently occurring answers can account for over 90% of all answers in the VQA dataset.",
+                    "annotation_spans": [
+                        {
+                            "start": 254,
+                            "end": 265,
+                            "text": "VQA dataset",
+                            "type": "dataset_name"
+                        },
+                        {
+                            "start": 59,
+                            "end": 69,
+                            "text": "QI dataset",
+                            "type": "dataset_name"
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/delft/applications/datasetTagger.py b/delft/applications/datasetTagger.py
new file mode 100644
index 00000000..2c111327
--- /dev/null
+++ b/delft/applications/datasetTagger.py
@@ -0,0 +1,311 @@
+import os
+import argparse
+import json
+import time
+import numpy as np
+
+from sklearn.model_selection import train_test_split
+
+from delft.sequenceLabelling import Sequence
+from delft.sequenceLabelling.reader import load_data_and_labels_json_offsets
+from delft.utilities.misc import parse_number_ranges
+
+def configure(architecture, output_path=None, max_sequence_length=-1, batch_size=-1, embeddings_name=None, max_epoch=-1, use_ELMo=False):
+    """
+    Set up the default parameters based on the model type.
+    """
+    model_name = 'datasets'
+
+    multiprocessing = True
+    max_epoch = 60
+    early_stop = True
+
+    if "BERT" in architecture:
+        # architectures with some transformer layer/embeddings inside
+        if batch_size == -1:
+            #default
+            batch_size = 20
+        if max_sequence_length == -1:
+            #default 
+            max_sequence_length = 200
+
+        if max_sequence_length > 512:
+            # 512 is the largest sequence for BERT input
+            max_sequence_length = 512
+
+        embeddings_name = None
+
+    else:
+        # RNN-only architectures
+        if batch_size == -1:
+            batch_size = 20
+        if max_sequence_length == -1:
+            max_sequence_length = 1500
+        multiprocessing = False
+
+    model_name += '-' + architecture
+
+    if use_ELMo:
+        model_name += '-with_ELMo'
+
+    return batch_size, max_sequence_length, model_name, embeddings_name, max_epoch, multiprocessing, early_stop
+
+
+# train a model with all available data
+def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
+               input_path=None, output_path=None, fold_count=1,
+               features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1, use_ELMo=False):
+    print('Loading data...')
+    if input_path is None:
+        x_all, y_all = load_data_and_labels_json_offsets('data/sequenceLabelling/datasets/dataseer_sentences.json')
+    else:
+        x_all, y_all = load_data_and_labels_json_offsets(input_path)
+
+    x_train, x_valid, y_train, y_valid = train_test_split(x_all, y_all, test_size=0.1, shuffle=True)
+
+    print(len(x_train), 'train sequences')
+    print(len(x_valid), 'validation sequences')
+
+    batch_size, max_sequence_length, model_name, embeddings_name, max_epoch, multiprocessing, early_stop = configure(architecture, 
+                                                                            output_path, 
+                                                                            max_sequence_length, 
+                                                                            batch_size, 
+                                                                            embeddings_name,
+                                                                            max_epoch,
+                                                                            use_ELMo)
+    model = Sequence(model_name,
+                    recurrent_dropout=0.50,
+                    embeddings_name=embeddings_name,
+                    architecture=architecture,
+                    transformer_name=transformer,
+                    max_sequence_length=max_sequence_length,
+                    batch_size=batch_size,
+                    fold_number=fold_count,
+                    features_indices=features_indices,
+                    max_epoch=max_epoch, 
+                    use_ELMo=use_ELMo,
+                    multiprocessing=multiprocessing,
+                    early_stop=early_stop)
+
+    start_time = time.time()
+    model.train(x_train, y_train, x_valid=x_valid, y_valid=y_valid)
+    runtime = round(time.time() - start_time, 3)
+
+    print("training runtime: %s seconds " % runtime)
+
+    # saving the model
+    if output_path:
+        model.save(output_path)
+    else:
+        model.save()
+
+
+# split data, train a model and evaluate it
+def train_eval(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
+               input_path=None, output_path=None, fold_count=1,
+               features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1, use_ELMo=False):
+    print('Loading data...')
+    if input_path is None:
+        x_all1 = y_all1 = x_all2 = y_all2 = x_all3 = y_all3 = []
+        dataseer_sentences_path = "data/sequenceLabelling/datasets/dataseer_sentences.json"
+        if os.path.exists(dataseer_sentences_path):
+            x_all1, y_all1 = load_data_and_labels_json_offsets(dataseer_sentences_path)
+        ner_dataset_recognition_sentences_path = "data/sequenceLabelling/datasets/ner_dataset_recognition_sentences.json"
+        if os.path.exists(ner_dataset_recognition_sentences_path):
+            x_all2, y_all2 = load_data_and_labels_json_offsets(ner_dataset_recognition_sentences_path)
+        coleridge_sentences_path = "data/sequenceLabelling/datasets/coleridge_sentences.json.gz"
+        if os.path.exists(coleridge_sentences_path):    
+            x_all3, y_all3 = load_data_and_labels_json_offsets(coleridge_sentences_path)
+        x_all = np.concatenate((x_all1, x_all2, x_all3[:1000]), axis=0)
+        y_all = np.concatenate((y_all1, y_all2, y_all3[:1000]), axis=0)
+    else:
+        x_all, y_all = load_data_and_labels_json_offsets(input_path)
+
+    x_train_all, x_eval, y_train_all, y_eval = train_test_split(x_all, y_all, test_size=0.1, shuffle=True)
+    x_train, x_valid, y_train, y_valid = train_test_split(x_train_all, y_train_all, test_size=0.1)
+
+    print(len(x_train), 'train sequences')
+    print(len(x_valid), 'validation sequences')
+    print(len(x_eval), 'evaluation sequences')
+
+    batch_size, max_sequence_length, model_name, embeddings_name, max_epoch, multiprocessing, early_stop = configure(architecture, 
+                                                                            output_path, 
+                                                                            max_sequence_length, 
+                                                                            batch_size, 
+                                                                            embeddings_name,
+                                                                            max_epoch,
+                                                                            use_ELMo)
+    model = Sequence(model_name,
+                    recurrent_dropout=0.50,
+                    embeddings_name=embeddings_name,
+                    architecture=architecture,
+                    transformer_name=transformer,
+                    max_sequence_length=max_sequence_length,
+                    batch_size=batch_size,
+                    fold_number=fold_count,
+                    features_indices=features_indices,
+                    max_epoch=max_epoch, 
+                    use_ELMo=use_ELMo,
+                    multiprocessing=multiprocessing,
+                    early_stop=early_stop)
+
+    start_time = time.time()
+
+    if fold_count == 1:
+        model.train(x_train, y_train, x_valid=x_valid, y_valid=y_valid,)
+    else:
+        model.train_nfold(x_train, y_train, x_valid=x_valid, y_valid=y_valid)
+
+    runtime = round(time.time() - start_time, 3)
+    print("training runtime: %s seconds " % runtime)
+
+    # evaluation
+    print("\nEvaluation:")
+    model.eval(x_eval, y_eval)
+
+    # saving the model (must be called after eval for multiple fold training)
+    if output_path:
+        model.save(output_path)
+    else:
+        model.save()
+
+
+def eval_(input_path=None, architecture=None):
+    return
+
+
+# annotate a list of texts
+def annotate_text(texts, output_format, architecture='BidLSTM_CRF', features=None, use_ELMo=False):
+    annotations = []
+
+    # load model
+    model_name = 'datasets'
+    model_name += '-'+architecture
+    if use_ELMo:
+        model_name += '-with_ELMo'
+
+    model = Sequence(model_name)
+    model.load()
+
+    start_time = time.time()
+
+    annotations = model.tag(texts, output_format, features=features)
+    runtime = round(time.time() - start_time, 3)
+
+    if output_format == 'json':
+        annotations["runtime"] = runtime
+    else:
+        print("runtime: %s seconds " % (runtime))
+
+    return annotations
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = "Trainer for dataset recognition models using the DeLFT library")
+
+    actions = ["train", "train_eval", "eval", "tag"]
+
+    architectures_word_embeddings = [
+                     'BidLSTM', 'BidLSTM_CRF', 'BidLSTM_ChainCRF', 'BidLSTM_CNN_CRF', 'BidLSTM_CNN_CRF', 'BidGRU_CRF', 'BidLSTM_CNN', 'BidLSTM_CRF_CASING', 
+                     'BidLSTM_CRF_FEATURES', 'BidLSTM_ChainCRF_FEATURES', 
+                     ]
+
+    word_embeddings_examples = ['glove-840B', 'fasttext-crawl', 'word2vec']
+
+    architectures_transformers_based = [
+                    'BERT', 'BERT_CRF', 'BERT_ChainCRF', 'BERT_CRF_FEATURES', 'BERT_CRF_CHAR', 'BERT_CRF_CHAR_FEATURES'
+                     ]
+
+    architectures = architectures_word_embeddings + architectures_transformers_based
+
+    pretrained_transformers_examples = ['bert-base-cased', 'bert-large-cased', 'allenai/scibert_scivocab_cased']
+
+    parser.add_argument("action", choices=actions)
+    parser.add_argument("--fold-count", type=int, default=1, help="Number of fold to use when evaluating with n-fold "
+                                                                  "cross validation.")
+    parser.add_argument("--architecture", help="Type of model architecture to be used, one of "+str(architectures))
+    parser.add_argument("--use-ELMo", action="store_true", help="Use ELMo contextual embeddings") 
+
+    # group_embeddings = parser.add_mutually_exclusive_group(required=False)
+    parser.add_argument(
+        "--embedding", 
+        default=None,
+        help="The desired pre-trained word embeddings using their descriptions in the file. " + \
+            "For local loading, use delft/resources-registry.json. " + \
+            "Be sure to use here the same name as in the registry, e.g. " + str(word_embeddings_examples) + \
+            " and that the path in the registry to the embedding file is correct on your system."
+    )
+    parser.add_argument(
+        "--transformer",
+        default=None,
+        help="The desired pre-trained transformer to be used in the selected architecture. " + \
+            "For local loading use, delft/resources-registry.json, and be sure to use here the same name as in the registry, e.g. " + \
+            str(pretrained_transformers_examples) + \
+            " and that the path in the registry to the model path is correct on your system. " + \
+            "HuggingFace transformers hub will be used otherwise to fetch the model, see https://huggingface.co/models " + \
+            "for model names"
+    )
+    parser.add_argument("--output", help="Directory where to save a trained model.")
+    parser.add_argument("--input", help="Grobid data file to be used for training (train action), for training and " +
+                                        "evaluation (train_eval action) or just for evaluation (eval action).")
+    parser.add_argument("--max-sequence-length", type=int, default=-1, help="max-sequence-length parameter to be used.")
+    parser.add_argument("--batch-size", type=int, default=-1, help="batch-size parameter to be used.")
+
+    args = parser.parse_args()
+
+    action = args.action
+    architecture = args.architecture
+    output = args.output
+    input_path = args.input
+    embeddings_name = args.embedding
+    max_sequence_length = args.max_sequence_length
+    batch_size = args.batch_size
+    transformer = args.transformer
+    use_ELMo = args.use_ELMo
+
+    if transformer is None and embeddings_name is None:
+        # default word embeddings
+        embeddings_name = "glove-840B"
+
+    if action == "train":
+            train(embeddings_name=embeddings_name, 
+            architecture=architecture, 
+            transformer=transformer,
+            input_path=input_path, 
+            output_path=output,
+            max_sequence_length=max_sequence_length,
+            batch_size=batch_size,
+            use_ELMo=use_ELMo)
+
+    if action == "eval":
+        if args.fold_count is not None and args.fold_count > 1:
+            print("The argument fold-count argument will be ignored. For n-fold cross-validation, please use "
+                  "it in combination with train_eval")
+        if input_path is None:
+            raise ValueError("A Grobid evaluation data file must be specified to evaluate a grobid model with the parameter --input")
+        eval_(input_path=input_path, architecture=architecture)
+
+    if action == "train_eval":
+        if args.fold_count < 1:
+            raise ValueError("fold-count should be equal or more than 1")
+        train_eval(embeddings_name=embeddings_name, 
+                architecture=architecture, 
+                transformer=transformer,
+                input_path=input_path, 
+                output_path=output, 
+                fold_count=args.fold_count,
+                max_sequence_length=max_sequence_length,
+                batch_size=batch_size,
+                use_ELMo=use_ELMo)
+
+    if action == "tag":
+        someTexts = []
+        someTexts.append("The DEGs were annotated using the following databases: the NR protein database (NCBI), Swiss Prot, Gene Ontology (GO), the Kyoto Encyclopedia of Genes and Genomes (KEGG) database, and the Clusters of Orthologous Groups database (COG) according to the methods of described by Zhou et al")
+        someTexts.append("The electrochemiluminescence immunoassay was used to measure serum concentration of 25-hydroxyvitamin D using Roche Modular E170 Analyzer (Roche Diagnostics, Basel, Switzerland).")
+        someTexts.append("We found that this technique works very well in practice, for the MNIST and NORB datasets (see below).")
+        someTexts.append("We also compare ShanghaiTechRGBD with other RGB-D crowd counting datasets in , and we can see that ShanghaiTechRGBD is the most challenging RGB-D crowd counting dataset in terms of the number of images and heads.")
+
+        result = annotate_text(someTexts, "json", architecture=architecture, use_ELMo=use_ELMo)
+        print(json.dumps(result, sort_keys=False, indent=4, ensure_ascii=False))
+        
+
diff --git a/delft/sequenceLabelling/reader.py b/delft/sequenceLabelling/reader.py
index 7a7f5f23..68359c18 100644
--- a/delft/sequenceLabelling/reader.py
+++ b/delft/sequenceLabelling/reader.py
@@ -1,12 +1,12 @@
 import numpy as np
 import xml
 from xml.sax import make_parser, handler
-from delft.utilities.Tokenizer import tokenizeAndFilterSimple
+from delft.utilities.Tokenizer import tokenizeAndFilterSimple, tokenizeAndFilter, tokenize
 import re
 import os
 import gzip
 from tqdm import tqdm
-
+import json
 
 class TEIContentHandler(xml.sax.ContentHandler):
     """ 
@@ -670,6 +670,97 @@ def load_data_and_labels_ontonotes(ontonotesRoot, lang='en'):
     return final_tokens, final_label
 
 
+def load_data_and_labels_json_offsets(jsonCorpus, tokenizer=None):
+    """
+    Load data and labels from json corpus where annotations are expressed with offsets. 
+    This requires a tokenizer passed as parameter. If tokenizer is None, we use the generic
+    Indo-European tokenizer.
+
+    Note: input file can be gzipped or not
+
+    {
+    "lang": "en",
+    "level": "sentence",
+    "documents": [
+        {
+            "id": "10.1371/journal.pone.0198300",
+            "body_text": [
+                {
+                    "text": "The test was designed so that bacteria were collected at 1 hour and 6 hours after start time on each day of testing.",
+                    "annotation_spans": [
+                        {
+                            "start": 30,
+                            "end": 38,
+                            "text": "bacteria",
+                            "type": "dataset",
+                            "datatype": "Tabular Data:Sample Table"
+                        }
+                    ]
+                },
+            ]
+        }
+    }    
+
+    Returns:
+        tuple(numpy array, numpy array): data and labels
+    """
+    if not os.path.exists(jsonCorpus):
+        print("Invalid path file: ", jsonCorpus)
+        return None, None
+
+    all_tokens = []
+    all_labels = []
+
+    if jsonCorpus.endswith(".gz"):
+        corpus_file = gzip.open(jsonCorpus, "rt")
+    else:
+        corpus_file = open(jsonCorpus, "rt")
+    
+    jsonDocuments = json.load(corpus_file)
+    if "documents" in jsonDocuments:
+        for jsonDocument in jsonDocuments["documents"]:
+            if "body_text" in jsonDocument:
+                for text_piece in jsonDocument["body_text"]:
+                    if "text" in text_piece:
+                        tokens = []
+                        labels = []
+                        text = text_piece["text"]
+                        local_tokens, local_offsets = tokenizeAndFilter(text)
+                        spans = []
+                        if "annotation_spans" in text_piece:
+                            for annotation_span in text_piece["annotation_spans"]:
+                                local_type = None
+                                if "type" in annotation_span:
+                                    local_type = annotation_span["type"]
+                                    local_type = local_type.replace(" ", "_")
+                                spans.append([annotation_span["start"], annotation_span["end"], local_type])
+                        i =0
+                        for local_token in local_tokens:
+                            tokens.append(local_token)
+                            offset = local_offsets[i]
+                            found = False
+                            for span in spans:
+                                if span[0] <= offset[0] and (offset[1] <= span[1] or offset[0] < span[1]):
+                                    if span[0] == offset[0]:
+                                        labels.append("B-"+span[2])
+                                    else:
+                                        labels.append("I-"+span[2])
+                                    found = True
+                                    break
+                            if not found:
+                                labels.append("O")
+                            i += 1
+
+                        all_tokens.append(tokens)
+                        all_labels.append(labels)
+
+    corpus_file.close()
+
+    final_tokens = np.asarray(all_tokens)
+    final_labels = np.asarray(all_labels)
+
+    return final_tokens, final_labels
+
 if __name__ == "__main__":
     # some tests
     xmlPath = '../../data/sequenceLabelling/toxic/train.xml'
diff --git a/delft/utilities/Tokenizer.py b/delft/utilities/Tokenizer.py
index 8aa48dc4..ddb152ed 100644
--- a/delft/utilities/Tokenizer.py
+++ b/delft/utilities/Tokenizer.py
@@ -3,16 +3,25 @@
 # Generic simple tokenizer for Indo-European languages
 # also python side of GROBID default tokenizer, used for Indo-European languages
 
-delimiters = "\n\r\t\f\u00A0([ •*,:;?.!/)-−–‐\"“”‘’'`$]*\u2666\u2665\u2663\u2660\u00A0"
+delimiters = "\n\r\t\f\u00A0([ •*,:;?.!/)-−–‐\"“”‘’'`$]*\u2666\u2665\u2663\u2660\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B"
 regex = '|'.join(map(re.escape, delimiters))
 pattern = re.compile('('+regex+')') 
 # additional parenthesis above are for capturing delimiters and keep then in the token list
 
-blanks = ' \t\n'
+blanks = ' \t\n\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B'
 
-def tokenizeAndFilter(text):
+def tokenize(text):
     """
-    Tokenization following the above pattern with offset information
+    Tokenization following the above pattern with offset information and keep 
+    blank characters (space family) as tokens
+    """
+    return tokenizeAndFilter(text, filterBlank=False)
+
+
+def tokenizeAndFilter(text, filterBlank=True):
+    """
+    Tokenization following the above pattern with offset information and with 
+    filtering of blank characters (space family)
     """
     offset = 0
     offsets = []
@@ -27,7 +36,7 @@ def tokenizeAndFilter(text):
     finalOffsets = []
     i = 0
     for token in tokens:
-        if token not in blanks:
+        if not filterBlank or token not in blanks:
             finalTokens.append(token)
             finalOffsets.append(offsets[i])
         i += 1
@@ -51,6 +60,6 @@ def tokenizeAndFilterSimple(text):
 
     return finalTokens
 
-
 def filterSpace(token):
-    return (token not in blanks)
\ No newline at end of file
+    return (token not in blanks)
+    
\ No newline at end of file
diff --git a/doc/datasets_scores_0.3.1.txt b/doc/datasets_scores_0.3.1.txt
new file mode 100644
index 00000000..36120408
--- /dev/null
+++ b/doc/datasets_scores_0.3.1.txt
@@ -0,0 +1,42 @@
+dataseer_sentences.json + ner_dataset_recognition_sentences.json
++ 10% coleridge ner_dataset_recognition_sentences.json
+
+11474 train sequences
+1275 validation sequences
+1417 evaluation sequences
+
+(around 3000 negative sentences)
+
+> python3 delft/applications/datasetTagger.py train_eval --architecture BidLSTM_CRF --use-ELMo --fold-count 1
+
+----------------------------------------------------------------------
+
+** Worst ** model scores - run 6
+                  precision    recall  f1-score   support
+
+     data_device     0.4483    0.5361    0.4883        97
+         dataset     0.7049    0.6624    0.6830       927
+    dataset_name     0.8816    0.8948    0.8882       466
+
+all (micro avg.)     0.7418    0.7268    0.7342      1490
+
+
+** Best ** model scores - run 7
+                  precision    recall  f1-score   support
+
+     data_device     0.5397    0.3505    0.4250        97
+         dataset     0.7269    0.6861    0.7059       927
+    dataset_name     0.8979    0.9056    0.9017       466
+
+all (micro avg.)     0.7756    0.7329    0.7536      1490
+
+----------------------------------------------------------------------
+
+Average over 10 folds
+                  precision    recall  f1-score   support
+
+     data_device     0.5191    0.3794    0.4261        97
+         dataset     0.7185    0.6715    0.6938       927
+    dataset_name     0.8904    0.8946    0.8924       466
+
+all (micro avg.)     0.7635    0.7223    0.7421