Skip to content

Commit

Permalink
Change YamlConfiguration encoding styles.
Browse files Browse the repository at this point in the history
On JVMs with UTF-8 default encoding, this commit has no change in behavior.

On JVMs with ascii default encoding (like some minimal linux installa-
tions), this commit now uses UTF-8 for YamlConfiguration operations.
Because all ascii is valid UTF-8, there is no feature degradation or data
loss during the transition.

On JVMs with any non-unicode but ascii-compliant encoding, this commit now
forces YamlConfiguration to escape special characters when writing to
files, effectively rendering the encoding to be plain ascii. Any affected
file will now be able to migrate to UTF-8 in the future without data-loss
or explicit conversion. When reading files, YamlConfiguration will use the
system default encoding to handle any incoming non-utf8 data, with the
expectation that any newly written file is still compliant with the
system's default encoding.

On JVMs with any non-unicode, but ascii-incompliant encoding (this may be
the case for some Eastern character sets on Windows systems), this change
is breaking, but is justified in claim that these systems would otherwise
be unable to read YamlConfiguration for implementation dependent settings
or from plugins themselves. For these systems, all uses of the encoding
will be forced to use UTF-8 in all cases, and is effectively treated as if
it was configured to be UTF-8 by default.

On JVMs with unicode encoding of UTF-16 or UTF-32, the ability to load any
configurations from almost any source prior to this change would have been
unfeasible, if not impossible. As of this change, however, these systems
now behave as expected when writing or reading files. However, when
reading from any plugin jar, UTF-8 will be used, matching a super-majority
of plugin developer base and requirements for the plugin.yml.

Plugin developers may now mark their plugin as UTF-8 compliant, as
documented in the PluginDescriptionFile class. This change will cause the
appropriate APIs in JavaPlugin to ignore any system default encoding,
instead using a Reader with the UTF-8 encoding, effectively rendering the
jar system independent. This does not affect the aformentioned JVM
settings for reading and writing files.

To coincide with these changes, YamlConfiguration methods that utilize a
stream are now deprecated to encourage use of a more strict denotation.
File methods carry system-specific behaviors to prevent unncessary data
loss during the transitional phase, while Reader methods are now provided
that have a very well-defined encoder behavior. For the transition from
InputStream methods to Reader methods, an API has been added to JavaPlugin
to provide a Reader that matches the previous behavior as well as
compliance to the UTF-8 flag in the PluginDescriptionFile.

Addresses BUKKIT-314, BUKKIT-1466, BUKKIT-3377
  • Loading branch information
Wolvereness committed May 14, 2014
1 parent 8291081 commit 24883a6
Show file tree
Hide file tree
Showing 5 changed files with 326 additions and 14 deletions.
89 changes: 83 additions & 6 deletions src/main/java/org/bukkit/configuration/file/FileConfiguration.java
Original file line number Diff line number Diff line change
@@ -1,25 +1,68 @@
package org.bukkit.configuration.file;

import com.google.common.base.Charsets;
import com.google.common.io.Files;

import org.apache.commons.lang.Validate;
import org.bukkit.configuration.InvalidConfigurationException;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;

import org.bukkit.configuration.Configuration;
import org.bukkit.configuration.MemoryConfiguration;
import org.yaml.snakeyaml.external.biz.base64Coder.Base64Coder;

/**
* This is a base class for all File based implementations of {@link
* Configuration}
*/
public abstract class FileConfiguration extends MemoryConfiguration {
/**
* This value specified that the system default encoding should be
* completely ignored, as it cannot handle the ASCII character set, or it
* is a strict-subset of UTF8 already (plain ASCII).
*
* @deprecated temporary compatibility measure
*/
@Deprecated
public static final boolean UTF8_OVERRIDE;
/**
* This value specifies if the system default encoding is unicode, but
* cannot parse standard ASCII.
*
* @deprecated temporary compatibility measure
*/
@Deprecated
public static final boolean UTF_BIG;
/**
* This value specifies if the system supports unicode.
*
* @deprecated temporary compatibility measure
*/
@Deprecated
public static final boolean SYSTEM_UTF;
static {
final byte[] testBytes = Base64Coder.decode("ICEiIyQlJicoKSorLC0uLzAxMjM0NTY3ODk6Ozw9Pj9AQUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2BhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ent8fX4NCg==");
final String testString = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\r\n";
final Charset defaultCharset = Charset.defaultCharset();
final String resultString = new String(testBytes, defaultCharset);
final boolean trueUTF = defaultCharset.name().contains("UTF");
UTF8_OVERRIDE = !testString.equals(resultString) || defaultCharset.equals(Charset.forName("US-ASCII"));
SYSTEM_UTF = trueUTF || UTF8_OVERRIDE;
UTF_BIG = trueUTF && UTF8_OVERRIDE;
}

/**
* Creates an empty {@link FileConfiguration} with no default values.
*/
Expand All @@ -43,6 +86,9 @@ public FileConfiguration(Configuration defaults) {
* If the file does not exist, it will be created. If already exists, it
* will be overwritten. If it cannot be overwritten or created, an
* exception will be thrown.
* <p>
* This method will save using the system default encoding, or possibly
* using UTF8.
*
* @param file File to save to.
* @throws IOException Thrown when the given file cannot be written to for
Expand All @@ -56,7 +102,7 @@ public void save(File file) throws IOException {

String data = saveToString();

FileWriter writer = new FileWriter(file);
Writer writer = new OutputStreamWriter(new FileOutputStream(file), UTF8_OVERRIDE && !UTF_BIG ? Charsets.UTF_8 : Charset.defaultCharset());

try {
writer.write(data);
Expand All @@ -71,6 +117,9 @@ public void save(File file) throws IOException {
* If the file does not exist, it will be created. If already exists, it
* will be overwritten. If it cannot be overwritten or created, an
* exception will be thrown.
* <p>
* This method will save using the system default encoding, or possibly
* using UTF8.
*
* @param file File to save to.
* @throws IOException Thrown when the given file cannot be written to for
Expand Down Expand Up @@ -99,6 +148,10 @@ public void save(String file) throws IOException {
* <p>
* If the file cannot be loaded for any reason, an exception will be
* thrown.
* <p>
* This will attempt to use the {@link Charset#defaultCharset()} for
* files, unless {@link #UTF8_OVERRIDE} but not {@link #UTF_BIG} is
* specified.
*
* @param file File to load from.
* @throws FileNotFoundException Thrown when the given file cannot be
Expand All @@ -111,7 +164,9 @@ public void save(String file) throws IOException {
public void load(File file) throws FileNotFoundException, IOException, InvalidConfigurationException {
Validate.notNull(file, "File cannot be null");

load(new FileInputStream(file));
final FileInputStream stream = new FileInputStream(file);

load(new InputStreamReader(stream, UTF8_OVERRIDE && !UTF_BIG ? Charsets.UTF_8 : Charset.defaultCharset()));
}

/**
Expand All @@ -120,20 +175,42 @@ public void load(File file) throws FileNotFoundException, IOException, InvalidCo
* All the values contained within this configuration will be removed,
* leaving only settings and defaults, and the new values will be loaded
* from the given stream.
* <p>
* This will attempt to use the {@link Charset#defaultCharset()}, unless
* {@link #UTF8_OVERRIDE} or {@link #UTF_BIG} is specified.
*
* @param stream Stream to load from
* @throws IOException Thrown when the given file cannot be read.
* @throws InvalidConfigurationException Thrown when the given file is not
* a valid Configuration.
* @throws IllegalArgumentException Thrown when stream is null.
* @deprecated This does not consider encoding
* @see #load(Reader)
*/
@Deprecated
public void load(InputStream stream) throws IOException, InvalidConfigurationException {
Validate.notNull(stream, "Stream cannot be null");

InputStreamReader reader = new InputStreamReader(stream);
StringBuilder builder = new StringBuilder();
BufferedReader input = new BufferedReader(reader);
load(new InputStreamReader(stream, UTF8_OVERRIDE ? Charsets.UTF_8 : Charset.defaultCharset()));
}

/**
* Loads this {@link FileConfiguration} from the specified reader.
* <p>
* All the values contained within this configuration will be removed,
* leaving only settings and defaults, and the new values will be loaded
* from the given stream.
*
* @param reader the reader to load from
* @throws IOException thrown when underlying reader throws an IOException
* @throws InvalidConfigurationException thrown when the reader does not
* represent a valid Configuration
* @throws IllegalArgumentException thrown when reader is null
*/
public void load(Reader reader) throws IOException, InvalidConfigurationException {
BufferedReader input = reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader);

StringBuilder builder = new StringBuilder();

try {
String line;
Expand Down
36 changes: 36 additions & 0 deletions src/main/java/org/bukkit/configuration/file/YamlConfiguration.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Map;
import java.util.logging.Level;

Expand Down Expand Up @@ -32,6 +33,7 @@ public class YamlConfiguration extends FileConfiguration {
public String saveToString() {
yamlOptions.setIndent(options().indent());
yamlOptions.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);
yamlOptions.setAllowUnicode(SYSTEM_UTF);
yamlRepresenter.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);

String header = buildHeader();
Expand Down Expand Up @@ -162,6 +164,8 @@ public YamlConfigurationOptions options() {
* Any errors loading the Configuration will be logged and then ignored.
* If the specified input is not a valid config, a blank config will be
* returned.
* <p>
* The encoding used may follow the system dependent default.
*
* @param file Input file
* @return Resulting configuration
Expand Down Expand Up @@ -194,7 +198,11 @@ public static YamlConfiguration loadConfiguration(File file) {
* @param stream Input stream
* @return Resulting configuration
* @throws IllegalArgumentException Thrown if stream is null
* @deprecated does not properly consider encoding
* @see #load(InputStream)
* @see #loadConfiguration(Reader)
*/
@Deprecated
public static YamlConfiguration loadConfiguration(InputStream stream) {
Validate.notNull(stream, "Stream cannot be null");

Expand All @@ -210,4 +218,32 @@ public static YamlConfiguration loadConfiguration(InputStream stream) {

return config;
}


/**
* Creates a new {@link YamlConfiguration}, loading from the given reader.
* <p>
* Any errors loading the Configuration will be logged and then ignored.
* If the specified input is not a valid config, a blank config will be
* returned.
*
* @param reader input
* @return resulting configuration
* @throws IllegalArgumentException Thrown if stream is null
*/
public static YamlConfiguration loadConfiguration(Reader reader) {
Validate.notNull(reader, "Stream cannot be null");

YamlConfiguration config = new YamlConfiguration();

try {
config.load(reader);
} catch (IOException ex) {
Bukkit.getLogger().log(Level.SEVERE, "Cannot load configuration from stream", ex);
} catch (InvalidConfigurationException ex) {
Bukkit.getLogger().log(Level.SEVERE, "Cannot load configuration from stream", ex);
}

return config;
}
}
29 changes: 29 additions & 0 deletions src/main/java/org/bukkit/plugin/PluginAwareness.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package org.bukkit.plugin;

import java.util.Set;

import org.bukkit.plugin.java.JavaPlugin;

/**
* Represents a concept that a plugin is aware of.
* <p>
* The internal representation may be singleton, or be a parameterized
* instance, but must be immutable.
*/
public interface PluginAwareness {
/**
* Each entry here represents a particular plugin's awareness. These can
* be checked by using {@link PluginDescriptionFile#getAwareness()}.{@link
* Set#contains(Object) contains(flag)}.
*/
public enum Flags implements PluginAwareness {
/**
* This specifies that all (text) resources stored in a plugin's jar
* use UTF-8 encoding.
*
* @see JavaPlugin#getTextResource(String)
*/
UTF8,
;
}
}
Loading

0 comments on commit 24883a6

Please sign in to comment.