Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into fcitx
Browse files Browse the repository at this point in the history
  • Loading branch information
Fcitx Bot committed Oct 15, 2024
2 parents 0cbb8f1 + 7be5a73 commit b79099a
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 30 deletions.
8 changes: 8 additions & 0 deletions src/base/file_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,18 @@ absl::Status FileUtil::CreateDirectory(const std::string &path) {
}

absl::Status FileUtilImpl::CreateDirectory(const std::string &path) const {
#if !defined(_WIN32)
// On Windows, this check is skipped to avoid freeze of the host application.
// This platform dependent behavior is a temporary solution to avoid
// freeze of the host application.
// https://github.com/google/mozc/issues/1076
//
// If the path already exists, returns OkStatus and does nothing.
if (const absl::Status status = DirectoryExists(path); status.ok()) {
return absl::OkStatus();
}
#endif // !_WIN32

#if defined(_WIN32)
const std::wstring wide = win32::Utf8ToWide(path);
if (wide.empty()) {
Expand Down
8 changes: 7 additions & 1 deletion src/base/file_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,13 @@ class FileUtil {
~FileUtil() = delete;

// Creates a directory. Does not create directories in the way to the path.
// If the directory already exists, returns OkStatus and does nothing.
// If the directory already exists:
// - On Windows: returns an error status.
// - Others: returns OkStatus and does nothing.
//
// The above platform dependent behavior is a temporary solution to avoid
// freeze of the host application.
// https://github.com/google/mozc/issues/1076
static absl::Status CreateDirectory(const std::string &path);

// Removes an empty directory.
Expand Down
6 changes: 6 additions & 0 deletions src/base/file_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,14 @@ TEST(FileUtilTest, CreateDirectory) {
EXPECT_OK(FileUtil::CreateDirectory(dirpath));
EXPECT_OK(FileUtil::DirectoryExists(dirpath));

#if !defined(_WIN32)
// On Windows, CreateDirectory does not return OK if the directory already
// exists. See the implementation of CreateDirectory in file_util.cc.
// https://github.com/google/mozc/issues/1076
//
// Create the same directory again.
EXPECT_OK(FileUtil::CreateDirectory(dirpath));
#endif // !_WIN32

// Delete the directory.
ASSERT_OK(FileUtil::RemoveDirectory(dirpath));
Expand Down
49 changes: 26 additions & 23 deletions src/converter/immutable_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@
#include <cstdint>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "absl/algorithm/container.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
Expand Down Expand Up @@ -297,8 +297,9 @@ std::vector<absl::string_view> GetBoundaryInfo(const Segment::Candidate &c) {
// Here,"渡しの" will be filtered if there is a cost gap from "私の"
class FirstInnerSegmentCandidateChecker {
public:
explicit FirstInnerSegmentCandidateChecker(const Segment &target_segment)
: target_segment_(target_segment) {}
explicit FirstInnerSegmentCandidateChecker(const Segment &target_segment,
int cost_max_diff)
: target_segment_(target_segment), cost_max_diff_(cost_max_diff) {}

bool IsGoodCandidate(const Segment::Candidate &c) {
if (c.key.size() != target_segment_.key().size() &&
Expand All @@ -308,15 +309,7 @@ class FirstInnerSegmentCandidateChecker {
return false;
}

if (!Util::ContainsScriptType(c.value, Util::KANJI)) {
// Do not filter non-kanji candidate.
// It may have unusual candidate cost.
return true;
}

constexpr int kCostDiff = 3107; // 500*log(500)
if (const auto &f = min_cost_for_key_.find(c.key);
f != min_cost_for_key_.end() && c.cost - f->second > kCostDiff) {
if (min_cost_.has_value() && c.cost - *min_cost_ > cost_max_diff_) {
return false;
}

Expand All @@ -329,9 +322,10 @@ class FirstInnerSegmentCandidateChecker {

if (Util::ContainsScriptType(c.value, Util::KANJI)) {
// Do not use non-kanji entry's cost. Sometimes it is too small.
auto [it, inserted] = min_cost_for_key_.try_emplace(c.key, c.cost);
if (!inserted) {
it->second = std::min(it->second, c.cost);
if (!min_cost_.has_value()) {
min_cost_ = c.cost;
} else {
*min_cost_ = std::min(*min_cost_, c.cost);
}
}
}
Expand All @@ -345,7 +339,8 @@ class FirstInnerSegmentCandidateChecker {
}

const Segment &target_segment_;
absl::flat_hash_map<std::string, int> min_cost_for_key_;
int cost_max_diff_;
std::optional<int> min_cost_;
Trie<bool> trie_;
};

Expand Down Expand Up @@ -2112,37 +2107,43 @@ void ImmutableConverter::InsertCandidatesForRealtime(
void ImmutableConverter::InsertCandidatesForRealtimeWithCandidateChecker(
const ConversionRequest &request, const Lattice &lattice,
absl::Span<const uint16_t> group, Segments *segments) const {
const commands::DecoderExperimentParams params =
request.request().decoder_experiment_params();
Segment *target_segment = segments->mutable_conversion_segment(0);
absl::flat_hash_set<std::string> added;

Segments tmp_segments = *segments;
{
// Candidates for the whole path
constexpr int kMaxSize = 3;
InsertCandidates(request, &tmp_segments, lattice, group, kMaxSize,
SINGLE_SEGMENT);

// InsertCandidates for SINGLE_SEGMENT should insert at least one candidate.
// At least one candidate should be added.
// Skip to add the similar candidates unless the char coverage is still
// available.
DCHECK_GT(tmp_segments.conversion_segment(0).candidates_size(), 0);
const auto &top_cand = tmp_segments.conversion_segment(0).candidate(0);
const std::vector<absl::string_view> top_boundary =
GetBoundaryInfo(top_cand);
int remaining_char_coverage =
params.realtime_conversion_single_segment_char_coverage();
for (int i = 0; i < tmp_segments.conversion_segment(0).candidates_size();
++i) {
const auto &c = tmp_segments.conversion_segment(0).candidate(i);
constexpr int kCostDiff = 2302; // 500*log(100)
if (c.cost - top_cand.cost > kCostDiff) {
continue;
}
const std::vector<absl::string_view> boundary = GetBoundaryInfo(c);
if (boundary.size() > 2 && i != 0 && boundary == top_boundary) {
// Skip to add the similar candidates excepting the case that the
// top candidate has the simple structure (i.e., "のXX", etc)
if (i != 0 && GetBoundaryInfo(c) == top_boundary &&
remaining_char_coverage < 0) {
// Skip to add the similar candidates when there is no remaining
// coverage.
continue;
}
Segment::Candidate *candidate = target_segment->add_candidate();
*candidate = c;
added.insert(c.value);
remaining_char_coverage -= Util::CharsLen(c.value);
}
}
tmp_segments.mutable_conversion_segment(0)->clear_candidates();
Expand All @@ -2153,7 +2154,9 @@ void ImmutableConverter::InsertCandidatesForRealtimeWithCandidateChecker(
request.max_conversion_candidates_size() -
target_segment->candidates_size(),
FIRST_INNER_SEGMENT);
FirstInnerSegmentCandidateChecker checker(*target_segment);
FirstInnerSegmentCandidateChecker checker(
*target_segment,
params.realtime_conversion_candidate_checker_cost_max_diff());
for (int i = 0; i < tmp_segments.conversion_segment(0).candidates_size();
++i) {
Segment::Candidate *c =
Expand Down
85 changes: 83 additions & 2 deletions src/converter/immutable_converter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <memory>
#include <string>
#include <utility>
Expand Down Expand Up @@ -531,8 +530,89 @@ TEST(ImmutableConverterTest, FirstInnerSegmentFiltering) {
EXPECT_TRUE(data_and_converter->GetConverter()->ConvertForRequest(
conversion_request, &segments));

EXPECT_THAT(*segment, ContainsCandidate(ValueIs("したとき")));
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("した時")));
// The same segment structure
EXPECT_THAT(*segment, Not(ContainsCandidate(ValueIs("したとき"))));
}
{
Segments segments;
Segment *segment = segments.add_segment();
segment->set_key("のとき");
EXPECT_TRUE(data_and_converter->GetConverter()->ConvertForRequest(
conversion_request, &segments));

EXPECT_THAT(*segment, ContainsCandidate(ValueIs("の時")));
// The same segment structure
EXPECT_THAT(*segment, Not(ContainsCandidate(ValueIs("のとき"))));
}
{
Segments segments;
Segment *segment = segments.add_segment();
segment->set_key("かえる");
EXPECT_TRUE(data_and_converter->GetConverter()->ConvertForRequest(
conversion_request, &segments));

EXPECT_THAT(*segment, ContainsCandidate(ValueIs("換える")));
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("代える")));
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("買える")));
// Filtered by cost diff
EXPECT_THAT(*segment, Not(ContainsCandidate(ValueIs("飼える"))));
}
{
Segments segments;
Segment *segment = segments.add_segment();
segment->set_key("くるまでこうどうした");
EXPECT_TRUE(data_and_converter->GetConverter()->ConvertForRequest(
conversion_request, &segments));

EXPECT_THAT(*segment, ContainsCandidate(ValueIs("車で行動した")));
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("車で")));
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("来るまで")));
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("くるまで")));
}
}

TEST(ImmutableConverterTest, FirstInnerSegmentFilteringParams) {
commands::Request request;
request_test_util::FillMobileRequest(&request);
request.mutable_decoder_experiment_params()
->set_enable_realtime_conversion_candidate_checker(true);
request.mutable_decoder_experiment_params()
->set_realtime_conversion_single_segment_char_coverage(2);
request.mutable_decoder_experiment_params()
->set_realtime_conversion_candidate_checker_cost_max_diff(
4605); // 500*log(10000);
ConversionRequest conversion_request;
conversion_request.set_request_type(ConversionRequest::PREDICTION);
conversion_request.set_request(&request);
conversion_request.set_create_partial_candidates(true);
conversion_request.set_max_conversion_candidates_size(100);

auto data_and_converter = std::make_unique<MockDataAndImmutableConverter>();
constexpr auto ValueIs = [](const auto &value) {
return Field(&Segment::Candidate::value, StrEq(value));
};

{
Segments segments;
Segment *segment = segments.add_segment();
segment->set_key("したとき");
EXPECT_TRUE(data_and_converter->GetConverter()->ConvertForRequest(
conversion_request, &segments));

EXPECT_THAT(*segment, ContainsCandidate(ValueIs("した時")));
// Not enough char coverage
EXPECT_THAT(*segment, Not(ContainsCandidate(ValueIs("したとき"))));
}
{
Segments segments;
Segment *segment = segments.add_segment();
segment->set_key("のとき");
EXPECT_TRUE(data_and_converter->GetConverter()->ConvertForRequest(
conversion_request, &segments));

EXPECT_THAT(*segment, ContainsCandidate(ValueIs("の時")));
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("のとき")));
}
{
Segments segments;
Expand All @@ -544,6 +624,7 @@ TEST(ImmutableConverterTest, FirstInnerSegmentFiltering) {
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("換える")));
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("代える")));
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("買える")));
// cost diff < cost_max_diff
EXPECT_THAT(*segment, ContainsCandidate(ValueIs("飼える")));
}
{
Expand Down
24 changes: 23 additions & 1 deletion src/protocol/commands.proto
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ message Capability {
[default = NO_TEXT_DELETION_CAPABILITY];
}

// Next ID: 98
// Next ID: 102
// Bundles together some Android experiment flags so that they can be easily
// retrieved throughout the native code. These flags are generally specific to
// the decoder, and are made available when the decoder is initialized.
Expand All @@ -597,6 +597,15 @@ message DecoderExperimentParams {
// If zero, the previous implementation is used.
optional int32 typing_correction_result_reranker_mode = 95 [default = 0];

// Promote TC/Literal if w * tc_reranker_score + (1.0 - w) lm_diff > 0.0.
optional float typing_correction_result_reranker_interpolation_weight = 100
[default = 0.0];

// Runs Literal-at-least second when original_tc_score + w * tc_reranker_score
// > 0.0. otherwise, runs literal-on-top.
optional float typing_correction_result_reranker_literal_on_top_weight = 101
[default = 0.0];

// Uses the typing correction in user history predictor. The size specifies
// the maximum number of typing corrections used for query lookup.
// When zero, typing_correction is not used.
Expand Down Expand Up @@ -659,6 +668,19 @@ message DecoderExperimentParams {
// <= katakana_override_min_per_char_cost
optional int32 katakana_override_min_per_char_cost = 91 [default = 0];

// Parameters for realtime conversion
// (go/mozc-src/converter/immutable_converter.cc for details)
//
// - Character coverage for single segment
// If the total length of the included candidates' value does not exceed this
// value, the target candidate will not be filtered even when the boundary is
// the same with the top candidate.
optional int32 realtime_conversion_single_segment_char_coverage = 98
[default = 0];
// Cost max diff for first segment to filter candidates.
// default_value: 500*log(500)
optional int32 realtime_conversion_candidate_checker_cost_max_diff = 99
[default = 3107];
}

// Clients' request to the server.
Expand Down
3 changes: 0 additions & 3 deletions src/win32/tip/tip_text_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ constexpr GUID kTipFunctionProvider = {
#else // GOOGLE_JAPANESE_INPUT_BUILD

constexpr char kHelpUrl[] = "https://github.com/google/mozc";
constexpr char kLogFileName[] = "Mozc_tsf_ui.log";
constexpr wchar_t kTaskWindowClassName[] =
L"Mozc Immersive Task Message Window";

Expand Down Expand Up @@ -511,8 +510,6 @@ class TipTextServiceImpl
StorePointerForCurrentThread(this);

HRESULT result = E_UNEXPECTED;
RegisterLogFileSink(
FileUtil::JoinPath(SystemUtil::GetLoggingDirectory(), kLogFileName));

EnsureKanaLockUnlocked();

Expand Down

0 comments on commit b79099a

Please sign in to comment.