diff --git a/Abot/Abot.csproj b/Abot/Abot.csproj index 1e7ed04b..ea0c5bdd 100644 --- a/Abot/Abot.csproj +++ b/Abot/Abot.csproj @@ -75,6 +75,7 @@ + diff --git a/Abot/Core/PageRequester.cs b/Abot/Core/PageRequester.cs index b34cb5c9..189b9674 100644 --- a/Abot/Core/PageRequester.cs +++ b/Abot/Core/PageRequester.cs @@ -103,7 +103,7 @@ public virtual CrawledPage MakeRequest(Uri uri, Func crawledPage.RequestCompleted = DateTime.Now; if (response != null) { - crawledPage.HttpWebResponse = response; + crawledPage.HttpWebResponse = new HttpWebResponseWrapper(response); CrawlDecision shouldDownloadContentDecision = shouldDownloadContent(crawledPage); if (shouldDownloadContentDecision.Allow) { diff --git a/Abot/Poco/CrawledPage.cs b/Abot/Poco/CrawledPage.cs index 9b33ecc3..e1c49745 100644 --- a/Abot/Poco/CrawledPage.cs +++ b/Abot/Poco/CrawledPage.cs @@ -3,6 +3,7 @@ using log4net; using System; using System.Collections.Generic; +using System.Collections.Specialized; using System.Net; namespace Abot.Poco @@ -47,7 +48,7 @@ public CrawledPage(Uri uri) /// /// Web response from the server. NOTE: The Close() method has been called before setting this property. /// - public HttpWebResponse HttpWebResponse { get; set; } + public HttpWebResponseWrapper HttpWebResponse { get; set; } /// /// The web exception that occurred during the crawl diff --git a/Abot/Poco/HttpWebResponseWrapper.cs b/Abot/Poco/HttpWebResponseWrapper.cs new file mode 100644 index 00000000..6989ca78 --- /dev/null +++ b/Abot/Poco/HttpWebResponseWrapper.cs @@ -0,0 +1,120 @@ +using System; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; + +namespace Abot.Poco +{ + /// Result of crawling a page + /// + /// We use this wrapper class to enable using responses obtained by methods different than executing an HttpWebRequest. + /// E.g. one may use a browser control embedded in the application to get a page content and construct an instance of this class + /// to pass it to Abot. + /// + public class HttpWebResponseWrapper + { + private HttpWebResponse InternalResponse; + private byte[] Content; + private Lazy ContentStream; + + #region Constructors + + /// Constructs a response based on the received system http response. + public HttpWebResponseWrapper(HttpWebResponse response) + { + this.InternalResponse = response; + + if (response == null) + return; + + this.StatusCode = response.StatusCode; + this.ContentType = response.ContentType; + this.ContentLength = response.ContentLength; + this.Headers = response.Headers; + this.CharacterSet = response.CharacterSet; + this.ContentEncoding = response.ContentEncoding; + this.Cookies = response.Cookies; + this.IsFromCache = response.IsFromCache; + this.IsMutuallyAuthenticated = response.IsMutuallyAuthenticated; + this.LastModified = response.LastModified; + this.Method = response.Method; + this.ProtocolVersion = response.ProtocolVersion; + this.ResponseUri = response.ResponseUri; + this.Server = response.Server; + this.StatusDescription = response.StatusDescription; + } + + /// Constructs a response based on custom parameters. + /// Recieves parameters neccesarily set for Abot to work. + public HttpWebResponseWrapper(HttpStatusCode statusCode, string contentType, byte[] content, NameValueCollection headers) + { + this.StatusCode = statusCode; + this.Headers = headers; + this.ContentType = contentType; + this.ContentLength = content != null ? content.Length : 0; + this.Content = content; + this.ContentStream = new Lazy(() => this.Content != null ? new MemoryStream(this.Content) : null); + } + + /// Constructs an empty response to be filled later. + public HttpWebResponseWrapper() { } + + #endregion + + #region Properties + + /// Status code returned by the server + public HttpStatusCode StatusCode { get; set; } + /// Server designated type of content + public string ContentType { get; set; } + /// Server designated length of content in bytes + public long ContentLength { get; set; } + /// Collection of headers in the response + public NameValueCollection Headers { get; set; } + /// Gets the character set of the response. + public string CharacterSet { get; set; } + /// Gets the method that is used to encode the body of the response. + public string ContentEncoding { get; set; } + /// Gets or sets the cookies that are associated with this response. + public CookieCollection Cookies { get; set; } + /// Was the response generated from the local cache? + public bool IsFromCache { get; set; } + /// Gets a System.Boolean value that indicates whether both client and server were authenticated. + public bool IsMutuallyAuthenticated { get; set; } + /// Gets the last date and time that the contents of the response were modified. + public DateTime LastModified { get; set; } + /// Gets the method that is used to return the response. + public string Method { get; set; } + /// Gets the version of the HTTP protocol that is used in the response. + public Version ProtocolVersion { get; set; } + /// Gets the URI of the Internet resource that responded to the request. + public Uri ResponseUri { get; set; } + /// Gets the name of the server that sent the response. + public string Server { get; set; } + /// Gets the status description returned with the response. + public string StatusDescription { get; set; } + + #endregion + + #region Stream Methods + + /// Gets the actual response data. + public Stream GetResponseStream() + { + return this.InternalResponse != null ? + this.InternalResponse.GetResponseStream() : + this.ContentStream.Value; + } + + /// Gets the header with the given name. + public string GetResponseHeader(string header) + { + return this.Headers != null ? this.Headers[header] : null; + } + + #endregion + } +}