-
Notifications
You must be signed in to change notification settings - Fork 83
/
Copy pathread_xml.Rd
79 lines (64 loc) · 2.63 KB
/
read_xml.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xml_parse.R
\name{read_xml}
\alias{read_xml}
\alias{read_html}
\alias{read_xml.character}
\alias{read_xml.raw}
\alias{read_xml.connection}
\title{Read HTML or XML.}
\usage{
read_xml(x, encoding = "", ..., as_html = FALSE, options = "NOBLANKS")
read_html(x, encoding = "", ..., options = c("RECOVER", "NOERROR",
"NOBLANKS"))
\method{read_xml}{character}(x, encoding = "", ..., as_html = FALSE,
options = "NOBLANKS")
\method{read_xml}{raw}(x, encoding = "", base_url = "", ...,
as_html = FALSE, options = "NOBLANKS")
\method{read_xml}{connection}(x, encoding = "", n = 64 * 1024,
verbose = FALSE, ..., base_url = "", as_html = FALSE,
options = "NOBLANKS")
}
\arguments{
\item{x}{A string, a connection, or a raw vector.
A string can be either a path, a url or literal xml. Urls will
be converted into connections either using \code{base::url} or, if
installed, \code{curl::curl}. Local paths ending in \code{.gz},
\code{.bz2}, \code{.xz}, \code{.zip} will be automatically uncompressed.
If a connection, the complete connection is read into a raw vector before
being parsed.}
\item{encoding}{Specify a default encoding for the document. Unless
otherwise specified XML documents are assumed to be in UTF-8 or
UTF-16. If the document is not UTF-8/16, and lacks an explicit
encoding directive, this allows you to supply a default.}
\item{...}{Additional arguments passed on to methods.}
\item{as_html}{Optionally parse an xml file as if it's html.}
\item{options}{Set parsing options for the libxml2 parser. Zero of more of
\Sexpr[results=rd]{xml2:::describe_options(xml2:::xml_parse_options())}}
\item{base_url}{When loading from a connection, raw vector or literal
html/xml, this allows you to specify a base url for the document. Base
urls are used to turn relative urls into absolute urls.}
\item{n}{If \code{file} is a connection, the number of bytes to read per
iteration. Defaults to 64kb.}
\item{verbose}{When reading from a slow connection, this prints some
output on every iteration so you know its working.}
}
\value{
An XML document. HTML is normalised to valid XML - this may not
be exactly the same transformation performed by the browser, but it's
a reasonable approximation.
}
\description{
Read HTML or XML.
}
\examples{
# Literal xml/html is useful for small examples
read_xml("<foo><bar /></foo>")
read_html("<html><title>Hi<title></html>")
read_html("<html><title>Hi")
# From a local path
read_html(system.file("extdata", "r-project.html", package = "xml2"))
# From a url
cd <- read_xml(xml2_example("cd_catalog.xml"))
me <- read_html("http://had.co.nz")
}