Skip to content

Commit

Permalink
Add an EFS durable file system for hashing lambda (facebook#933)
Browse files Browse the repository at this point in the history
* Add an EFS durable file system for hashing lambda

Summary
---
This file system will be used to write content hashes by the hashing lambda so that we can build indexes to use for retroaction.

Test Plan
---
```
$ terraform init
$ terraform apply
```

Added the following snippet to the hashing lambda and deployed.

```python
    with open(f"/mnt/durable-storage/{context.aws_request_id}.file", "w") as f:
        f.write("Hello, World!")

    from os import listdir

    logger.info(listdir("/mnt/durable-storage/"))
```

Hit test a few times, deployed a new version of the lambda and then checked the logs..

Saw the following

```
[INFO]	2022-02-24T03:21:05.008Z	9e057c0d-2027-468a-b797-f261d3964178	['944d7e76-0fb5-4a7b-9104-de86fd90dfc0.file', 'a36b2c1b-1ef4-47d0-aa5c-7f69498c12fd.file', '57e95dc9-efaa-4707-abac-ab1017c7c7ed.file', 'abff2ab6-fd19-47dd-85d8-3bcf2f63c5b7.file', '51191f91-75fc-4f6d-a770-44c46725c054.file', '9e057c0d-2027-468a-b797-f261d3964178.file']
```

See how files accumulate in the file system.

* Upgrade terraform version
  • Loading branch information
dmukhg authored Mar 1, 2022
1 parent e25bfc1 commit a6f7114
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 17 deletions.
30 changes: 14 additions & 16 deletions hasher-matcher-actioner/terraform/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 59 additions & 0 deletions hasher-matcher-actioner/terraform/durable-fs/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

data "aws_region" "current" {}

/*
* # Durable file system
* Hashing labmda uses an elastic file-system to write hashes at a
* high-througput. The files so-generated are used in other lambdas to create
* clusters from recently seen content.
*
* EFS can only be mounted onto lambdas that are connected to a VPC. So, this
* module ends up creating a dedicated VPC.
*/
resource "aws_efs_file_system" "lcc_durable_fs" {
creation_token = "${var.prefix}-lcc-durable-filesystem"

tags = merge(
var.additional_tags,
{
Name = "LCC_DurableFS"
}
)
}

# Create a VPC for EFS mounts
module "lcc_efs_vpc" {
source = "terraform-aws-modules/vpc/aws"

name = "${var.prefix}-lcc-efs-vpc"
cidr = "10.10.0.0/16"
azs = ["${data.aws_region.current.name}a", "${data.aws_region.current.name}b", "${data.aws_region.current.name}c"]
intra_subnets = ["10.10.101.0/24"]
}

# Mount target connects the file system to the subnet
resource "aws_efs_mount_target" "lcc_durable_fs" {
file_system_id = aws_efs_file_system.lcc_durable_fs.id
subnet_id = module.lcc_efs_vpc.intra_subnets[0]
security_groups = [module.lcc_efs_vpc.default_security_group_id]
}

# EFS access point used by lambda file system
resource "aws_efs_access_point" "access_point_for_lambda" {
file_system_id = aws_efs_file_system.lcc_durable_fs.id

root_directory {
path = "/lambda"
creation_info {
owner_gid = 1000
owner_uid = 1000
permissions = "777"
}
}

posix_user {
gid = 1000
uid = 1000
}
}
12 changes: 12 additions & 0 deletions hasher-matcher-actioner/terraform/durable-fs/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

output "durable_fs_security_group_ids" {
value = [module.lcc_efs_vpc.default_security_group_id]
}

output "durable_fs_subnet_ids" {
value = module.lcc_efs_vpc.intra_subnets
}

output "durable_fs_arn" {
value = aws_efs_access_point.access_point_for_lambda.arn
}
11 changes: 11 additions & 0 deletions hasher-matcher-actioner/terraform/durable-fs/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

variable "prefix" {
description = "Prefix to use for resource names"
type = string
}

variable "additional_tags" {
description = "Additional resource tags"
type = map(string)
}
15 changes: 15 additions & 0 deletions hasher-matcher-actioner/terraform/hasher/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ resource "aws_lambda_function" "hashing_lambda" {
}
}

vpc_config {
security_group_ids = var.durable_fs_security_group_ids
subnet_ids = var.durable_fs_subnet_ids
}

file_system_config {
local_mount_path = var.durable_fs_local_mount_path
arn = var.durable_fs_arn
}

tags = merge(
var.additional_tags,
{
Expand Down Expand Up @@ -115,6 +125,11 @@ resource "aws_iam_role_policy_attachment" "hashing_lambda_permissions" {
policy_arn = aws_iam_policy.hashing_lambda_policy.arn
}

resource "aws_iam_role_policy_attachment" "AWSLambdaVPCAccessExecutionRole" {
role = aws_iam_role.hashing_lambda_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole"
}

resource "aws_lambda_event_source_mapping" "submissions_to_hasher" {
event_source_arn = var.submissions_queue.arn
function_name = aws_lambda_function.hashing_lambda.arn
Expand Down
20 changes: 20 additions & 0 deletions hasher-matcher-actioner/terraform/hasher/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,23 @@ variable "image_data_storage" {
all_bucket_arns = list(string)
})
}

variable "durable_fs_security_group_ids" {
description = "SG Ids for the durable file-system we are mounting on the hashing lambda."
type = list(string)
}

variable "durable_fs_subnet_ids" {
description = "subnet Ids for the durable file-system we are mounting on the hashing lambda."
type = list(string)
}

variable "durable_fs_local_mount_path" {
description = "Local mount path durable file-system we are mounting on the hashing lambda."
type = string
}

variable "durable_fs_arn" {
description = "ARN for the durable file-system we are mounting on the hashing lambda."
type = string
}
14 changes: 13 additions & 1 deletion hasher-matcher-actioner/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ locals {
te_data_folder = module.hashing_data.threat_exchange_data_folder_info.key
te_api_token_secret_name = "threatexchange/${var.prefix}_api_tokens"
hma_api_tokens_secret_name = "hma/${var.prefix}_api_tokens"

durable_storage_path = "/mnt/durable-storage"
}

### Config storage ###
Expand Down Expand Up @@ -218,6 +220,12 @@ module "authentication" {
webapp_and_api_shared_user_pool_client_id = var.webapp_and_api_shared_user_pool_client_id
}

module "durable_fs" {
source = "./durable-fs"
prefix = var.prefix
additional_tags = var.additional_tags
}


/**
* # Primary S3 Bucket:
Expand Down Expand Up @@ -292,7 +300,6 @@ resource "aws_s3_bucket" "banks_media_bucket" {
}
}


/*
* # Submissions SQS:
* Submissions from the API are routed directly into a queue. Doing an SNS
Expand Down Expand Up @@ -402,6 +409,11 @@ module "hasher" {
)
}

durable_fs_subnet_ids = module.durable_fs.durable_fs_subnet_ids
durable_fs_security_group_ids = module.durable_fs.durable_fs_security_group_ids
durable_fs_arn = module.durable_fs.durable_fs_arn
durable_fs_local_mount_path = local.durable_storage_path

log_retention_in_days = var.log_retention_in_days
additional_tags = merge(var.additional_tags, local.common_tags)
config_table = local.config_table
Expand Down

0 comments on commit a6f7114

Please sign in to comment.