Skip to content

Commit

Permalink
Merge commit for internal changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Patrick Nguyen committed Jun 6, 2018
2 parents a3ef451 + 5c26ec2 commit b458040
Show file tree
Hide file tree
Showing 94 changed files with 4,418 additions and 1,899 deletions.
24 changes: 4 additions & 20 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,10 @@ check_bazel_version_at_least("0.10.0")

load("//tensorflow:workspace.bzl", "tf_workspace")

# Uncomment and update the paths in these entries to build the Android demo.
#android_sdk_repository(
# name = "androidsdk",
# api_level = 23,
# # Ensure that you have the build_tools_version below installed in the
# # SDK manager as it updates periodically.
# build_tools_version = "26.0.1",
# # Replace with path to Android SDK on your system
# path = "<PATH_TO_SDK>",
#)
#
#android_ndk_repository(
# name="androidndk",
# path="<PATH_TO_NDK>",
# # This needs to be 14 or higher to compile TensorFlow.
# # Please specify API level >= 21 to build for 64-bit architecture
# # otherwise the Android NDK will automatically select the latest
# # API level it does support without notice.
# # Note that the NDK version is not the API level.
# api_level=14)
load("//third_party/android:android_configure.bzl", "android_configure")
android_configure(name="local_config_android")
load("@local_config_android//:android.bzl", "android_workspace")
android_workspace()

# Please add all new TensorFlow dependencies in workspace.bzl.
tf_workspace()
Expand Down
94 changes: 29 additions & 65 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,8 +670,9 @@ def valid_ndk_path(path):
error_msg=('The path %s or its child file "source.properties" '
'does not exist.')
)

write_android_ndk_workspace_rule(android_ndk_home_path)
write_action_env_to_bazelrc('ANDROID_NDK_HOME', android_ndk_home_path)
write_action_env_to_bazelrc('ANDROID_NDK_API_LEVEL',
check_ndk_level(android_ndk_home_path))


def create_android_sdk_rule(environ_cp):
Expand Down Expand Up @@ -733,41 +734,12 @@ def valid_build_tools(version):
error_msg=('The selected SDK does not have build-tools version %s '
'available.'))

write_android_sdk_workspace_rule(android_sdk_home_path,
android_build_tools_version,
android_api_level)


def write_android_sdk_workspace_rule(android_sdk_home_path,
android_build_tools_version,
android_api_level):
print('Writing android_sdk_workspace rule.\n')
with open(_TF_WORKSPACE, 'a') as f:
f.write("""
android_sdk_repository(
name="androidsdk",
api_level=%s,
path="%s",
build_tools_version="%s")\n
""" % (android_api_level, android_sdk_home_path, android_build_tools_version))


def write_android_ndk_workspace_rule(android_ndk_home_path):
print('Writing android_ndk_workspace rule.')
ndk_api_level = check_ndk_level(android_ndk_home_path)
if int(ndk_api_level) not in _SUPPORTED_ANDROID_NDK_VERSIONS:
print('WARNING: The API level of the NDK in %s is %s, which is not '
'supported by Bazel (officially supported versions: %s). Please use '
'another version. Compiling Android targets may result in confusing '
'errors.\n' % (android_ndk_home_path, ndk_api_level,
_SUPPORTED_ANDROID_NDK_VERSIONS))
with open(_TF_WORKSPACE, 'a') as f:
f.write("""
android_ndk_repository(
name="androidndk",
path="%s",
api_level=%s)\n
""" % (android_ndk_home_path, ndk_api_level))
write_action_env_to_bazelrc('ANDROID_BUILD_TOOLS_VERSION',
android_build_tools_version)
write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL',
android_api_level)
write_action_env_to_bazelrc('ANDROID_SDK_HOME',
android_sdk_home_path)


def check_ndk_level(android_ndk_home_path):
Expand All @@ -780,18 +752,16 @@ def check_ndk_level(android_ndk_home_path):

revision = re.search(r'Pkg.Revision = (\d+)', filedata)
if revision:
return revision.group(1)
return None


def workspace_has_any_android_rule():
"""Check the WORKSPACE for existing android_*_repository rules."""
with open(_TF_WORKSPACE, 'r') as f:
workspace = f.read()
has_any_rule = re.search(r'^android_[ns]dk_repository',
workspace,
re.MULTILINE)
return has_any_rule
ndk_api_level = revision.group(1)
else:
raise Exception('Unable to parse NDK revision.')
if int(ndk_api_level) not in _SUPPORTED_ANDROID_NDK_VERSIONS:
print('WARNING: The API level of the NDK in %s is %s, which is not '
'supported by Bazel (officially supported versions: %s). Please use '
'another version. Compiling Android targets may result in confusing '
'errors.\n' % (android_ndk_home_path, ndk_api_level,
_SUPPORTED_ANDROID_NDK_VERSIONS))
return ndk_api_level


def set_gcc_host_compiler_path(environ_cp):
Expand Down Expand Up @@ -1223,7 +1193,7 @@ def set_tf_cuda_compute_capabilities(environ_cp):
# Check whether all capabilities from the input is valid
all_valid = True
# Remove all whitespace characters before splitting the string
# that users may insert by accident, as this will result in error
# that users may insert by accident, as this will result in error
tf_cuda_compute_capabilities = ''.join(tf_cuda_compute_capabilities.split())
for compute_capability in tf_cuda_compute_capabilities.split(','):
m = re.match('[0-9]+.[0-9]+', compute_capability)
Expand Down Expand Up @@ -1556,21 +1526,15 @@ def main():
set_build_strip_flag()
set_windows_build_flags()

if workspace_has_any_android_rule():
print('The WORKSPACE file has at least one of ["android_sdk_repository", '
'"android_ndk_repository"] already set. Will not ask to help '
'configure the WORKSPACE. Please delete the existing rules to '
'activate the helper.\n')
else:
if get_var(
environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace',
False,
('Would you like to interactively configure ./WORKSPACE for '
'Android builds?'),
'Searching for NDK and SDK installations.',
'Not configuring the WORKSPACE for Android builds.'):
create_android_ndk_rule(environ_cp)
create_android_sdk_rule(environ_cp)
if get_var(
environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace',
False,
('Would you like to interactively configure ./WORKSPACE for '
'Android builds?'),
'Searching for NDK and SDK installations.',
'Not configuring the WORKSPACE for Android builds.'):
create_android_ndk_rule(environ_cp)
create_android_sdk_rule(environ_cp)

print('Preconfigured Bazel build configs. You can use any of the below by '
'adding "--config=<>" to your build command. See tools/bazel.rc for '
Expand Down
1 change: 0 additions & 1 deletion tensorflow/compiler/xla/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ xla_proto_library(
deps = [
":xla_data_proto",
"//tensorflow/compiler/xla/service:hlo_proto",
"//tensorflow/compiler/xla/service:session_proto",
],
)

Expand Down
1 change: 1 addition & 0 deletions tensorflow/compiler/xla/client/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ cc_library(
"//tensorflow/compiler/xla/service:compiler",
"//tensorflow/compiler/xla/service:device_memory_allocator",
"//tensorflow/compiler/xla/service:executable",
"//tensorflow/compiler/xla/service:hlo_proto",
"//tensorflow/compiler/xla/service:local_service",
"//tensorflow/compiler/xla/service:shaped_buffer",
"//tensorflow/compiler/xla/service:source_map_util",
Expand Down
22 changes: 11 additions & 11 deletions tensorflow/compiler/xla/client/local_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ StatusOr<ScopedShapedBuffer> LocalExecutable::Run(
run_options, backend_->StreamBorrower(),
backend_->eigen_intra_op_thread_pool());

if (executable_->dumping()) {
if (executable_->dumping_snapshot()) {
return ExecuteAndDump(&service_options, arguments);
}
return executable_->ExecuteOnStreamWrapper(
Expand All @@ -195,36 +195,36 @@ StatusOr<ScopedShapedBuffer> LocalExecutable::Run(
StatusOr<ScopedShapedBuffer> LocalExecutable::ExecuteAndDump(
const ServiceExecutableRunOptions* run_options,
const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
executable_->session_module()->set_execution_platform(
executable_->hlo_snapshot()->set_execution_platform(
backend_->platform()->Name());
TF_RETURN_IF_ERROR(RecordArguments(arguments, executable_->session_module()));
TF_RETURN_IF_ERROR(RecordArguments(arguments, executable_->hlo_snapshot()));
TF_ASSIGN_OR_RETURN(
ScopedShapedBuffer result,
executable_->ExecuteOnStream(run_options, arguments,
/*hlo_execution_profile=*/nullptr));
TF_RETURN_IF_ERROR(RecordResult(&result, executable_->session_module()));
TF_RETURN_IF_ERROR(executable_->DumpSessionModule());
TF_RETURN_IF_ERROR(RecordResult(&result, executable_->hlo_snapshot()));
TF_RETURN_IF_ERROR(executable_->DumpHloSnapshot());
return std::move(result);
}

Status LocalExecutable::RecordArguments(
const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
SessionModule* session_module) {
session_module->clear_arguments();
HloSnapshot* hlo_snapshot) {
hlo_snapshot->clear_arguments();
for (const ShapedBuffer* argument : arguments) {
TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> literal,
LiteralFromShapedBuffer(*argument));
*session_module->add_arguments() = literal->ToProto();
*hlo_snapshot->add_arguments() = literal->ToProto();
}
return Status::OK();
}

Status LocalExecutable::RecordResult(const ShapedBuffer* result,
SessionModule* session_module) {
session_module->clear_result();
HloSnapshot* hlo_snapshot) {
hlo_snapshot->clear_result();
TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> literal,
LiteralFromShapedBuffer(*result));
*session_module->mutable_result() = literal->ToProto();
*hlo_snapshot->mutable_result() = literal->ToProto();
return Status::OK();
}

Expand Down
6 changes: 3 additions & 3 deletions tensorflow/compiler/xla/client/local_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/compiler.h"
#include "tensorflow/compiler/xla/service/device_memory_allocator.h"
#include "tensorflow/compiler/xla/service/executable.h"
#include "tensorflow/compiler/xla/service/hlo.pb.h"
#include "tensorflow/compiler/xla/service/local_service.h"
#include "tensorflow/compiler/xla/service/shaped_buffer.h"
#include "tensorflow/compiler/xla/statusor.h"
Expand Down Expand Up @@ -78,11 +79,10 @@ class LocalExecutable {
// proto.
Status RecordArguments(
const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
SessionModule* session_module);
HloSnapshot* hlo_snapshot);

// Records the result of the computation in a SessionModule proto.
Status RecordResult(const ShapedBuffer* result,
SessionModule* session_module);
Status RecordResult(const ShapedBuffer* result, HloSnapshot* hlo_snapshot);

// Returns a literal containing the contents of the given ShapedBuffer.
StatusOr<std::unique_ptr<Literal>> LiteralFromShapedBuffer(
Expand Down
24 changes: 23 additions & 1 deletion tensorflow/compiler/xla/client/xla_client/xla_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1613,13 +1613,35 @@ XlaOp XlaBuilder::BatchNormGrad(const XlaOp& operand, const XlaOp& scale,

XlaOp XlaBuilder::CrossReplicaSum(const XlaOp& operand) {
return NoteErrorOrReturn([&]() -> StatusOr<XlaOp> {
HloInstructionProto instr;
TF_ASSIGN_OR_RETURN(const Shape& shape, GetShape(operand));
const Shape& scalar_shape = ShapeUtil::MakeShape(shape.element_type(), {});
auto b = CreateSubBuilder("sum");
b->Add(b->Parameter(/*parameter_number=*/0, scalar_shape, "x"),
b->Parameter(/*parameter_number=*/1, scalar_shape, "y"));
TF_ASSIGN_OR_RETURN(auto computation, b->Build());
return CrossReplicaSum(operand, computation, /*replica_group_ids=*/{},
/*channel_id=*/tensorflow::gtl::nullopt);
});
}

XlaOp XlaBuilder::CrossReplicaSum(
const XlaOp& operand, const XlaComputation& computation,
tensorflow::gtl::ArraySlice<int64> replica_group_ids,
const tensorflow::gtl::optional<ChannelHandle>& channel_id) {
return NoteErrorOrReturn([&]() -> StatusOr<XlaOp> {
if (!replica_group_ids.empty() || channel_id.has_value()) {
return Unimplemented(
"replica_group_ids and channel_id and is not supported in AllReduce");
}

HloInstructionProto instr;
TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand));
TF_ASSIGN_OR_RETURN(
*instr.mutable_shape(),
ShapeInference::InferCrossReplicaSumShape({&operand_shape}));

AddCalledComputation(computation, &instr);

return AddInstruction(std::move(instr), HloOpcode::kCrossReplicaSum,
{operand});
});
Expand Down
23 changes: 23 additions & 0 deletions tensorflow/compiler/xla/client/xla_client/xla_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,29 @@ class XlaBuilder {
// supply one input to the sum and all replicas receive the resulting sum.
XlaOp CrossReplicaSum(const XlaOp& operand);

// Enqueues an operation that do an AllReduce of the operand cross cores. Here
// AllReduce means doing a reduction on the input operand cross cores and then
// broadcasting the reduction result to those cores. The reduction function is
// defined by `computation`, which should be a commutative computation on
// scalars, e.g., add, min, or max. The way that AllReduce is applied is
// configured by:
//
// - `replica_group_ids`: maps replica ids to subgroup ids. If empty, all
// replicas belong to one group. Allreduce will be applied within subgroups.
// For example, we have 4 replicas, then replica_group_ids={0,1,0,1} means,
// replica 0 and 2 are in subgroup 0, replica 1 and 3 are in subgroup 1.
//
// - `channel_id`: for Allreduce nodes from different models, if they have the
// same channel_id, they will be 'Allreduce'd. If empty, Allreduce will not be
// applied cross models.
//
// TODO(b/79737069): Rename this to AllReduce when it's ready to use.
XlaOp CrossReplicaSum(
const XlaOp& operand, const XlaComputation& computation,
tensorflow::gtl::ArraySlice<int64> replica_group_ids = {},
const tensorflow::gtl::optional<ChannelHandle>& channel_id =
tensorflow::gtl::nullopt);

// Enqueues an operation that scatters the `source` array to the selected
// indices of each window.
XlaOp SelectAndScatter(const XlaOp& operand, const XlaComputation& select,
Expand Down
10 changes: 0 additions & 10 deletions tensorflow/compiler/xla/service/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@ load(
"tf_proto_library_py",
)

xla_proto_library(
name = "session_proto",
srcs = ["session.proto"],
visibility = ["//visibility:public"],
deps = ["//tensorflow/compiler/xla:xla_data_proto"],
)

xla_proto_library(
name = "hlo_proto",
srcs = ["hlo.proto"],
Expand Down Expand Up @@ -608,7 +601,6 @@ cc_library(
":hlo_module_config",
":hlo_proto_util",
":platform_util",
":session_proto",
":source_map_util",
":transfer_manager",
":versioned_computation_handle",
Expand Down Expand Up @@ -766,7 +758,6 @@ cc_library(
":hlo_graph_dumper",
":hlo_proto",
":pool",
":session_proto",
":shaped_buffer",
":versioned_computation_handle",
"//tensorflow/compiler/xla:executable_run_options",
Expand Down Expand Up @@ -870,7 +861,6 @@ cc_library(
hdrs = ["channel_tracker.h"],
deps = [
":hlo",
":session_proto",
":versioned_computation_handle",
"//tensorflow/compiler/xla:status",
"//tensorflow/compiler/xla:status_macros",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,17 @@ TEST_F(BFloat16ConversionFoldingTest, DoNotFoldTuple) {

TEST_F(BFloat16ConversionFoldingTest, FoldCrossReplicaSumTupleOutput) {
auto builder = HloComputation::Builder(TestName());

auto module = CreateNewModule();
HloComputation::Builder sum_builder("add");
auto x = sum_builder.AddInstruction(HloInstruction::CreateParameter(
/*parameter_number=*/0, ShapeUtil::MakeShape(F32, {}), "x"));
auto y = sum_builder.AddInstruction(HloInstruction::CreateParameter(
/*parameter_number=*/1, ShapeUtil::MakeShape(F32, {}), "y"));
sum_builder.AddInstruction(HloInstruction::CreateBinary(
ShapeUtil::MakeShape(F32, {}), HloOpcode::kAdd, x, y));
HloComputation* sum = module->AddEmbeddedComputation(sum_builder.Build());

Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4});
Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4});

Expand All @@ -223,7 +234,8 @@ TEST_F(BFloat16ConversionFoldingTest, FoldCrossReplicaSumTupleOutput) {

HloInstruction* crs =
builder.AddInstruction(HloInstruction::CreateCrossReplicaSum(
ShapeUtil::MakeTupleShape({f32_shape, f32_shape}), {convert_a, b}));
ShapeUtil::MakeTupleShape({f32_shape, f32_shape}), {convert_a, b},
sum));
HloInstruction* gte_a = builder.AddInstruction(
HloInstruction::CreateGetTupleElement(f32_shape, crs, 0));
HloInstruction* gte_b = builder.AddInstruction(
Expand All @@ -233,7 +245,6 @@ TEST_F(BFloat16ConversionFoldingTest, FoldCrossReplicaSumTupleOutput) {
HloInstruction* tuple = builder.AddInstruction(
HloInstruction::CreateTuple({gte_a, convert_gte_b}));

auto module = CreateNewModule();
auto computation = module->AddEntryComputation(builder.Build());

EXPECT_TRUE(FoldConversions(module.get()));
Expand Down
Loading

0 comments on commit b458040

Please sign in to comment.