Skip to content

Commit

Permalink
Merge commit for internal changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan Hseu committed Mar 14, 2017
2 parents 3549499 + f861a1e commit be6c40a
Show file tree
Hide file tree
Showing 202 changed files with 6,942 additions and 2,082 deletions.
6 changes: 4 additions & 2 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ function bazel_clean_and_fetch() {
if ! is_windows; then
bazel clean --expunge
fi
bazel fetch "//tensorflow/... -//tensorflow/contrib/nccl/... \
-//tensorflow/examples/android/..."
if [ -z "$TF_BAZEL_TARGETS" ]; then
TF_BAZEL_TARGETS="//tensorflow/... -//tensorflow/contrib/nccl/... -//tensorflow/examples/android/..."
fi
bazel fetch "$TF_BAZEL_TARGETS"
}

function sed_hyphen_i() {
Expand Down
3 changes: 2 additions & 1 deletion tensorflow/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ filegroup(
"//tensorflow/contrib:all_files",
"//tensorflow/contrib/android:all_files",
"//tensorflow/contrib/bayesflow:all_files",
"//tensorflow/contrib/cloud:all_files",
"//tensorflow/contrib/cloud/kernels:all_files",
"//tensorflow/contrib/compiler:all_files",
"//tensorflow/contrib/copy_graph:all_files",
"//tensorflow/contrib/crf:all_files",
Expand Down Expand Up @@ -228,7 +230,6 @@ filegroup(
"//tensorflow/core/grappler/inputs:all_files",
"//tensorflow/core/grappler/optimizers:all_files",
"//tensorflow/core/kernels:all_files",
"//tensorflow/core/kernels/cloud:all_files",
"//tensorflow/core/kernels/hexagon:all_files",
"//tensorflow/core/ops/compat:all_files",
"//tensorflow/core/platform/cloud:all_files",
Expand Down
10 changes: 10 additions & 0 deletions tensorflow/cc/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,16 @@ tf_gen_op_wrappers_cc(
visibility = ["//tensorflow:internal"],
)

tf_gen_op_wrappers_cc(
name = "resource_variable_ops",
include_internal_ops = 1,
op_lib_names = [
"resource_variable_ops",
],
pkg = "//tensorflow/core",
visibility = ["//tensorflow:internal"],
)

tf_gen_op_wrappers_cc(
name = "remote_fused_graph_ops",
op_lib_names = [
Expand Down
18 changes: 16 additions & 2 deletions tensorflow/compiler/tests/xla_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,11 @@ def test_scope(self):
yield


def Benchmark(tf_bench, builder_fn, use_xla_jit, device):
def Benchmark(tf_bench,
builder_fn,
use_xla_jit,
device,
separate_compiled_gradients=False):
"""Build a graph and run benchmarks against it, with or without XLA.
Args:
Expand All @@ -129,6 +133,14 @@ def Benchmark(tf_bench, builder_fn, use_xla_jit, device):
is a list of tensors to fetch as output.
use_xla_jit: If true compile with the XLA JIT, otherwise use regular TF.
device: The tensorflow device to run on, e.g. "cpu", "gpu".
separate_compiled_gradients: If true put each gradient subgraph into a
separate compilation scope. This gives fine-grained control over which
portions of the graph will be compiled as a single unit. Compiling
gradients separately may yield better performance for some graphs.
The scope is named based on the scope of the forward computation as well
as the name of the gradients. As a result, the gradients will be compiled
in a scope that is separate from both the forward computation, and from
other gradients.
"""

with ops.Graph().as_default():
Expand All @@ -137,7 +149,9 @@ def Benchmark(tf_bench, builder_fn, use_xla_jit, device):
with ops.device(device):
fetches = []
jit_scope = jit.experimental_jit_scope
with jit_scope(compile_ops=use_xla_jit):
with jit_scope(
compile_ops=use_xla_jit,
separate_compiled_gradients=separate_compiled_gradients):
name, fetches = builder_fn()

# We only want to benchmark the operations themselves, and not the data
Expand Down
10 changes: 9 additions & 1 deletion tensorflow/compiler/tf2xla/dump_graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,16 @@ struct NameCounts {
std::unordered_map<string, int> counts;
};

string MakeUniquePath(const string& name) {
string MakeUniquePath(string name) {
static NameCounts& instance = *new NameCounts;

// Remove illegal characters from `name`.
for (int i = 0; i < name.size(); ++i) {
if (name[i] == '/') {
name[i] = '_';
}
}

int count;
{
mutex_lock lock(instance.counts_mutex);
Expand Down
5 changes: 2 additions & 3 deletions tensorflow/compiler/tf2xla/kernels/declaration_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,8 @@ class ArgOp : public XlaOpKernel {
if (arg.is_variable) {
// We use the argument position of the variable input as a unique ID.
// TODO(phawkins): this code assumes that variables do not alias.
// TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
tc.CreateVariable(index_, arg.name, arg.value.type, arg.value.handle)
.IgnoreError();
OP_REQUIRES_OK(ctx, tc.CreateVariable(index_, arg.name, arg.value.type,
arg.value.handle));
ctx->SetVariableOutput(0, index_);
} else if (arg.value.is_constant) {
ctx->SetConstantOutput(0, arg.value.constant_value);
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/compiler/tf2xla/xla_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ int64 XlaCompiler::NextStepId() {
static void PruneUnreachableNodes(Graph* graph) {
std::unordered_set<const Node*> nodes;
for (Node* node : graph->nodes()) {
if (node->type_string() == "_Retval") {
if (node->type_string() == "_Retval" ||
StringPiece(node->type_string()).ends_with("Send")) {
nodes.insert(node);
}
}
Expand Down Expand Up @@ -379,7 +380,6 @@ Status XlaCompiler::CompileGraph(string const& name,
VLOG(1) << "Executing graph symbolically to populate ComputationBuilder.";

xla::ComputationBuilder builder(client(), name);

XlaContext* context =
new XlaContext(this, &builder, options_.allow_cpu_custom_calls,
options_.resolve_compile_time_constants);
Expand Down
1 change: 1 addition & 0 deletions tensorflow/compiler/xla/service/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ cc_test(
"//tensorflow/compiler/xla:shape_util",
"//tensorflow/compiler/xla:test_helpers",
"//tensorflow/compiler/xla:util",
"//tensorflow/compiler/xla/tests:hlo_test_base",
"//tensorflow/core:test_main",
],
)
Expand Down
3 changes: 2 additions & 1 deletion tensorflow/compiler/xla/service/buffer_assignment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,8 @@ Status GatherComputationsByAllocationType(
}

for (auto& instruction : computation->instructions()) {
for (auto* subcomputation : instruction->MakeCalledComputationsSet()) {
for (HloComputation* subcomputation :
instruction->called_computations()) {
switch (instruction->opcode()) {
case HloOpcode::kCall:
case HloOpcode::kWhile:
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/compiler/xla/service/buffer_liveness.cc
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ bool BufferLiveness::live_range_strictly_before(const LogicalBuffer& a,
// *) Is a loop fusion instruction (with DynamicUpdateSlice fused root) where
// the singleton use of 'a' at 'a.index' is the fused root at operand 0.
for (const BufferAlias& alias : points_to_analysis_->GetBufferAliases(a)) {
if (alias.instruction()->users().count(b.instruction()) > 0 &&
if (b.instruction()->IsUserOf(alias.instruction()) &&
!CanShareOperandBufferWithUser(alias.instruction(), alias.index(),
b.instruction(), b.index(),
points_to_analysis())) {
Expand Down
19 changes: 8 additions & 11 deletions tensorflow/compiler/xla/service/copy_insertion_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,14 @@ class CopyInsertionTest : public HloTestBase {
};
};

#define EXPECT_INST(A, E...) EXPECT_EQ(A, (std::set<HloInstruction*>{E}))

TEST_F(CopyInsertionTest, SingleParameter) {
auto builder = HloComputation::Builder(TestName());
HloInstruction* x = builder.AddInstruction(
HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(F32, {}), "x"));
HloInstruction* tuple =
builder.AddInstruction(HloInstruction::CreateTuple({x}));

EXPECT_INST(x->users(), tuple);
ExpectEqUnordered(x->users(), {tuple});

HloModule module(TestName());
module.AddEntryComputation(builder.Build());
Expand Down Expand Up @@ -127,7 +125,7 @@ TEST_F(CopyInsertionTest, SingleConstant) {
HloInstruction* tuple =
builder.AddInstruction(HloInstruction::CreateTuple({constant}));

EXPECT_INST(constant->users(), tuple);
ExpectEqUnordered(constant->users(), {tuple});

HloModule module(TestName());
module.AddEntryComputation(builder.Build());
Expand Down Expand Up @@ -221,9 +219,9 @@ TEST_F(CopyInsertionTest, AmbiguousPointsToSet) {
builder.AddInstruction(HloInstruction::CreateTernary(
tuple1->shape(), HloOpcode::kSelect, pred, tuple1, tuple2));

EXPECT_INST(constant1->users(), tuple1);
EXPECT_INST(constant2->users(), tuple1, tuple2);
EXPECT_INST(constant3->users(), tuple2);
ExpectEqUnordered(constant1->users(), {tuple1});
ExpectEqUnordered(constant2->users(), {tuple1, tuple2});
ExpectEqUnordered(constant3->users(), {tuple2});

HloModule module(TestName());
module.AddEntryComputation(builder.Build());
Expand Down Expand Up @@ -261,7 +259,7 @@ TEST_F(CopyInsertionTest, BitcastParameter) {
HloModule module(TestName());
module.AddEntryComputation(builder.Build());

EXPECT_INST(x->users(), bitcast);
ExpectEqUnordered(x->users(), {bitcast});

HloInstruction* old_root = module.entry_computation()->root_instruction();
InsertCopies(&module);
Expand Down Expand Up @@ -289,7 +287,7 @@ TEST_F(CopyInsertionTest, BitcastConstant) {
HloModule module(TestName());
module.AddEntryComputation(builder.Build());

EXPECT_INST(constant->users(), bitcast);
ExpectEqUnordered(constant->users(), {bitcast});

HloInstruction* old_root = module.entry_computation()->root_instruction();
InsertCopies(&module);
Expand All @@ -316,8 +314,7 @@ TEST_F(CopyInsertionTest, BitcastTupleElementParameter) {
HloModule module(TestName());
module.AddEntryComputation(builder.Build());

EXPECT_EQ(1, x->user_count());
EXPECT_EQ(*x->users().begin(), bitcast);
ExpectEqUnordered(x->users(), {bitcast});

HloInstruction* old_root = module.entry_computation()->root_instruction();
InsertCopies(&module);
Expand Down
3 changes: 2 additions & 1 deletion tensorflow/compiler/xla/service/gpu/fusion_merger.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"

#include <algorithm>
#include <vector>

#include "tensorflow/compiler/xla/service/hlo_cost_analysis.h"
#include "tensorflow/compiler/xla/service/instruction_fusion.h"
Expand Down Expand Up @@ -249,7 +250,7 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) {
return Status::OK();
}
// Merge fused instructions from 'fusion' into each user.
std::set<HloInstruction*> users = fusion->users();
std::vector<HloInstruction*> users = fusion->users();
for (HloInstruction* user : users) {
user->MergeFusionInstruction(fusion);
changed_ = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ limitations under the License.
#include "external/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h"
#include "external/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h"

#include "external/llvm/include/llvm/Transforms/IPO/Internalize.h"
#include "tensorflow/compiler/xla/types.h"
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/lib/io/path.h"
Expand Down Expand Up @@ -319,9 +320,13 @@ tensorflow::Status LinkLibdeviceIfNecessary(
VLOG(1) << "Linking with libdevice from: " << libdevice_path;
std::unique_ptr<llvm::Module> libdevice_module =
LoadIRModule(libdevice_path, &module->getContext());
if (linker.linkInModule(std::move(libdevice_module),
llvm::Linker::Flags::InternalizeLinkedSymbols |
llvm::Linker::Flags::LinkOnlyNeeded)) {
if (linker.linkInModule(
std::move(libdevice_module), llvm::Linker::Flags::LinkOnlyNeeded,
[](Module& M, const StringSet<>& GVS) {
internalizeModule(M, [&M, &GVS](const GlobalValue& GV) {
return !GV.hasName() || (GVS.count(GV.getName()) == 0);
});
})) {
return tensorflow::errors::Internal(tensorflow::strings::StrCat(
"Error linking libdevice from ", libdevice_path));
}
Expand Down
3 changes: 2 additions & 1 deletion tensorflow/compiler/xla/service/heap_simulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ StatusOr<HeapSimulator::Result> HeapSimulator::Run(
}
for (const BufferAlias& alias :
points_to_analysis.GetBufferAliases(*buffer)) {
const std::set<HloInstruction*>& users = alias.instruction()->users();
const std::vector<HloInstruction*>& users =
alias.instruction()->users();
if (!users.empty()) {
live_buffers[buffer].insert(users.begin(), users.end());
}
Expand Down
18 changes: 8 additions & 10 deletions tensorflow/compiler/xla/service/hlo_computation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,12 @@ Status HloComputation::RemoveInstruction(HloInstruction* instruction) {
TF_RET_CHECK(instruction->user_count() == 0)
<< "instruction " << instruction->name()
<< " has users and cannot be removed";
TF_RET_CHECK(instruction->control_predecessors().empty())
<< "instruction " << instruction->name()
<< " has control predecessors and cannot be removed";
TF_RET_CHECK(instruction->control_successors().empty())
<< "instruction " << instruction->name()
<< " has control successors and cannot be removed";

TF_RET_CHECK(instruction_iterators_.count(instruction) != 0);
auto inst_it = instruction_iterators_.at(instruction);
Expand Down Expand Up @@ -227,7 +233,8 @@ void ComputeComputationPostOrder(
}

for (auto& instruction : computation->instructions()) {
for (auto& called_computation : instruction->MakeCalledComputationsSet()) {
for (HloComputation* called_computation :
instruction->called_computations()) {
ComputeComputationPostOrder(called_computation, visited, post_order);
}
}
Expand Down Expand Up @@ -383,15 +390,6 @@ StatusOr<HloInstruction*> HloComputation::DeepCopyInstruction(
}
}

Status HloComputation::AddControlDependency(HloInstruction* predecessor,
HloInstruction* successor) {
TF_RET_CHECK(instruction_iterators_.count(predecessor) > 0);
TF_RET_CHECK(instruction_iterators_.count(successor) > 0);
successor->AddControlPredecessor(predecessor);
predecessor->AddControlSuccessor(successor);
return Status::OK();
}

ProgramShape HloComputation::ComputeProgramShape() const {
ProgramShape program_shape;

Expand Down
11 changes: 0 additions & 11 deletions tensorflow/compiler/xla/service/hlo_computation.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,17 +128,6 @@ class HloComputation {
return instructions_;
}

// Add a control dependency between the two instructions in this computation
// so that the 'predecessor' is visited before the 'successor' during the DFS
// traversal of the computation. Returns an error status if either of the
// given instructions does not belong to the current computation.
//
// This is used to enforce an additional ordering requirement that is not
// captured by normal data dependencies, such as ordering among Send or Recv
// operations to avoid deadlock.
Status AddControlDependency(HloInstruction* predecessor,
HloInstruction* successor);

// Compute and return a post-order of the instructions in the computation. In
// this order, definitions of values always appear before their uses.
std::list<HloInstruction*> MakeInstructionPostOrder() const;
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/compiler/xla/service/hlo_computation_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ TEST_F(HloComputationTest, CycleDetection) {
auto computation = builder.Build();

// Add a control dependency to create a cycle.
ASSERT_IS_OK(computation->AddControlDependency(add, negate));
ASSERT_IS_OK(add->AddControlDependencyTo(negate));

const auto visitor = [](HloInstruction* instruction) { return Status::OK(); };
auto visit_status = computation->Accept(visitor);
Expand Down
5 changes: 2 additions & 3 deletions tensorflow/compiler/xla/service/hlo_graph_dumper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,6 @@ string InstructionSequenceGraph(
instruction->metadata().source_line());
}

std::vector<HloComputation*> called_computations;

// Pick different colors or shapes for instructions which are particularly
// expensive (eg, dot) and those which are unusual in some way or unique
// (eg, parameter).
Expand Down Expand Up @@ -401,7 +399,8 @@ string InstructionSequenceGraph(
} else {
// Add a dotted edge between the instruction and any computations that the
// instruction calls.
for (auto* computation : instruction->MakeCalledComputationsSet()) {
for (const HloComputation* computation :
instruction->called_computations()) {
string cluster_name = StrCat("cluster_", ComputationId(computation));
string call_edge = Printf(
"%s -> %s [ style=dashed; ltail=%s ];\n",
Expand Down
Loading

0 comments on commit be6c40a

Please sign in to comment.