Skip to content

Commit

Permalink
LibWeb: Add start of HTML Tokenizer in Swift
Browse files Browse the repository at this point in the history
Currently it's just a Token class.
  • Loading branch information
ADKaster committed Aug 24, 2024
1 parent d0bc266 commit fb074f9
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 1 deletion.
5 changes: 5 additions & 0 deletions Tests/LibWeb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ if (ENABLE_SWIFT)
target_link_libraries(TestLibWebSwiftBindings PRIVATE AK LibWeb)
target_compile_options(TestLibWebSwiftBindings PRIVATE -parse-as-library)
add_test(NAME TestLibWebSwiftBindings COMMAND TestLibWebSwiftBindings)

add_executable(TestHTMLTokenizerSwift TestHTMLTokenizerSwift.swift)
target_link_libraries(TestHTMLTokenizerSwift PRIVATE AK LibWeb)
target_compile_options(TestHTMLTokenizerSwift PRIVATE -parse-as-library)
add_test(NAME TestHTMLTokenizerSwift COMMAND TestHTMLTokenizerSwift)
endif()
58 changes: 58 additions & 0 deletions Tests/LibWeb/TestHTMLTokenizerSwift.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) 2024, Andrew Kaster <[email protected]>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

import AK
import LibWeb
import SwiftLibWeb
import Foundation

class StandardError: TextOutputStream {
func write(_ string: Swift.String) {
try! FileHandle.standardError.write(contentsOf: Data(string.utf8))
}
}

@main
struct TestHTMLTokenizerSwift {

static func testTokenTypes() {
var standardError = StandardError()
print("Testing HTMLToken types...", to: &standardError)

let default_token = HTMLToken()
default_token.type = .Character(codePoint: "a")
precondition(default_token.isCharacter())

print("HTMLToken types pass", to: &standardError)
}

static func testParserWhitespace() {
var standardError = StandardError()
print("Testing HTMLToken parser whitespace...", to: &standardError)

for codePoint: Character in ["\t", "\n", "\r", "\u{000C}", " "] {
let token = HTMLToken(type: .Character(codePoint: codePoint))
precondition(token.isParserWhitespace())
}

for codePoint: Character in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] {
let token = HTMLToken(type: .Character(codePoint: codePoint))
precondition(!token.isParserWhitespace())
}

print("HTMLToken parser whitespace pass", to: &standardError)
}

static func main() {
var standardError = StandardError()
print("Starting test suite...", to: &standardError)

testTokenTypes()
testParserWhitespace()

print("All tests pass", to: &standardError)
}
}
2 changes: 1 addition & 1 deletion Tests/LibWeb/TestLibWebSwiftBindings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ struct TestLibWebSwiftBindings {

print("All tests pass", to: &standardError)
}
}
}
27 changes: 27 additions & 0 deletions Userland/Libraries/LibWeb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -794,4 +794,31 @@ if (ENABLE_SWIFT)
list(APPEND LIBWEB_ALL_GENERATED_HEADERS ${generated_headers})

generate_clang_module_map(LibWeb GENERATED_FILES ${LIBWEB_ALL_GENERATED_HEADERS})

target_compile_features(LibWeb PUBLIC cxx_std_23)

target_sources(LibWeb PRIVATE
HTML/Parser/HTMLToken.swift
)
target_compile_definitions(LibWeb PRIVATE LIBWEB_USE_SWIFT)
set_target_properties(LibWeb PROPERTIES Swift_MODULE_NAME "SwiftLibWeb")

# FIXME: These should be pulled automatically from interface compile options for the target
set(VFS_OVERLAY_OPTIONS
-Xcc -ivfsoverlay${CMAKE_CURRENT_BINARY_DIR}/vfs_overlay.yaml
-Xcc -ivfsoverlay${CMAKE_CURRENT_BINARY_DIR}/../LibGfx/vfs_overlay.yaml
-Xcc -ivfsoverlay${Lagom_BINARY_DIR}/AK/vfs_overlay.yaml
)
get_target_property(LIBWEB_NATIVE_DIRS LibWeb INCLUDE_DIRECTORIES)
_swift_generate_cxx_header(LibWeb "LibWeb-Swift.h"
SEARCH_PATHS ${LIBWEB_NATIVE_DIRS}
COMPILE_OPTIONS ${VFS_OVERLAY_OPTIONS}
)

# FIXME: https://gitlab.kitware.com/cmake/cmake/-/issues/26175
if (APPLE)
add_custom_command(TARGET LibWeb POST_BUILD
COMMAND install_name_tool -id @rpath/liblagom-web.0.dylib "$<TARGET_FILE:LibWeb>"
)
endif()
endif()
78 changes: 78 additions & 0 deletions Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) 2024, Andrew Kaster <[email protected]>>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

public class HTMLToken {
public struct Position {
var line = UInt()
var column = UInt()
var byteOffset = UInt()
}

public struct Attribute {
var prefix: String?
var localName: String
var namespace_: String?
var value: String
var nameStartPosition: Position
var nameEndPosition: Position
var valueStartPosition: Position
var valueEndPosition: Position
}

public enum TokenType {
case Invalid
case DOCTYPE(
name: String?,
publicIdentifier: String?,
systemIdentifier: String?,
forceQuirksMode: Bool)
case StartTag(
tagName: String,
selfClosing: Bool,
selfClosingAcknowledged: Bool,
attributes: [Attribute])
case EndTag(
tagName: String,
selfClosing: Bool,
selfClosingAcknowledged: Bool,
attributes: [Attribute])
case Comment(data: String)
case Character(codePoint: Character)
case EndOfFile
}

public func isCharacter() -> Bool {
if case .Character(_) = self.type {
return true
}
return false
}

public func isParserWhitespace() -> Bool {
precondition(isCharacter(), "isParserWhitespace() called on non-character token")

// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
switch self.type {
case .Character(codePoint: "\t"),
.Character(codePoint: "\n"),
.Character(codePoint: "\u{000C}"), // \f
.Character(codePoint: "\r"),
.Character(codePoint: " "):
return true
default:
return false
}
}

public var type = TokenType.Invalid
public var startPosition = Position()
public var endPosition = Position()

public init() {}
public init(type: TokenType) {
self.type = type
}
}

0 comments on commit fb074f9

Please sign in to comment.