Skip to content

Commit

Permalink
schema, lambda and template bugfixes related to appsync endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
marekq committed Dec 29, 2020
1 parent cd7b95c commit afe3358
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 15 deletions.
1 change: 1 addition & 0 deletions graphql/getlist-request.vtl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"version": "2017-02-28",
"operation": "Scan",
"index": "timest",
"nextToken": #if( $context.args.nextToken ) "$context.args.nextToken" #else null #end
}
5 changes: 3 additions & 2 deletions graphql/getquery-request.vtl
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
"operation" : "Query",
"index" : "visible",
"query": {
"expression": "visible = :visible",
"expression": "visible = :visible and timest > :timest",
"expressionValues": {
":visible" : { "S" : "y" }
":visible" : { "S" : "y" },
":timest" : { "N": $context.arguments.timest }
}
},
"nextToken": #if( $context.args.nextToken ) "$context.args.nextToken" #else null #end
Expand Down
3 changes: 1 addition & 2 deletions graphql/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ type Query {
getddbsource(guid: String!, timest: String!): ddbsource
listddbsource(filter: TableddbsourceFilterInput, limit: Int, nextToken: String): ddbsourceConnection
queryddbsourceByTimest(after: String, blogsource: String!, first: Int): ddbsourceConnection
queryddbsourceByVisible(filter: TableddbsourceFilterInput, after: String, first: Int, visible: String!): ddbsourceConnection
queryddbsourceByVisible(filter: TableddbsourceFilterInput, after: String, first: Int, visible: String!, timest: Int!): ddbsourceConnection
}

type ddbsource {
Expand All @@ -27,7 +27,6 @@ type ddbsourceConnection {
nextToken: String
}


input TableddbsourceFilterInput {
blogsource: TableStringFilterInput
guid: TableStringFilterInput
Expand Down
4 changes: 2 additions & 2 deletions lambda-crawl/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def get_guids(ts):
guids = []

# get the guid values up to x days ago
queryres = ddb.query(ScanIndexForward = True, IndexName = 'visible', ProjectionExpression = 'guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(str(ts)))
queryres = ddb.query(ScanIndexForward = True, IndexName = 'visible', ProjectionExpression = 'guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(ts))

for x in queryres['Items']:
if 'guid' in x:
Expand All @@ -42,7 +42,7 @@ def get_guids(ts):

# paginate the query in case more than 100 results are returned
while 'LastEvaluatedKey' in queryres:
queryres = ddb.query(ExclusiveStartKey = queryres['LastEvaluatedKey'], ScanIndexForward = True, IndexName = 'visible', ProjectionExpression = 'guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(str(ts)))
queryres = ddb.query(ExclusiveStartKey = queryres['LastEvaluatedKey'], ScanIndexForward = True, IndexName = 'visible', ProjectionExpression = 'guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(ts))

for x in queryres['Items']:
if 'guid' in x:
Expand Down
12 changes: 6 additions & 6 deletions lambda-getfeed/getfeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def put_dynamo(timest_post, title, cleantxt, rawhtml, description, link, blogsou
ddb.put_item(
TableName = os.environ['dynamo_table'],
Item = {
'timest' : timest_post, # store the unix timestamp of the post
'timest' : timest_post, # store the unix timestamp of the post as an int
'datestr' : datestr_post, # store the human friendly timestamp of the post
'title' : title,
'description' : description, # store the short rss feed description of the content
Expand Down Expand Up @@ -204,7 +204,7 @@ def get_feed(url, blogsource, guids):
blogupdate = True

# put record to dynamodb
put_dynamo(str(timest_post), title, cleantxt, rawhtml, description, link, blogsource, author, guid, tags, category, datestr_post)
put_dynamo(timest_post, title, cleantxt, rawhtml, description, link, blogsource, author, guid, tags, category, datestr_post)

# add blog to newblogs list
newblogs.append(str(blogsource) + ' ' + str(title) + ' ' + str(guid))
Expand Down Expand Up @@ -310,12 +310,12 @@ def get_table_json(blogsource):
if blogsource != 'all':

# query the dynamodb table for blogposts of a specific category from up to 1 day ago
blogs = ddb.query(IndexName = "timest", ScanIndexForward = True, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('blogsource').eq(blogsource) & Key('timest').gt(str(diff_ts)))
blogs = ddb.query(IndexName = "timest", ScanIndexForward = True, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('blogsource').eq(blogsource) & Key('timest').gt(diff_ts))

else:

# query the dynamodb table for all category blogposts from up to 1 day ago
blogs = ddb.query(IndexName = "visible", ScanIndexForward = True, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(str(diff_ts)))
blogs = ddb.query(IndexName = "visible", ScanIndexForward = True, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(diff_ts))

# iterate over the returned items
for a in blogs['Items']:
Expand All @@ -335,12 +335,12 @@ def get_table_json(blogsource):
if blogsource != 'all':

# query the dynamodb table for blogposts of a specific category
blogs = ddb.query(IndexName = "timest", ScanIndexForward = True, ExclusiveStartKey = lastkey, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('blogsource').eq(source) & Key('timest').gt(str(diff_ts)))
blogs = ddb.query(IndexName = "timest", ScanIndexForward = True, ExclusiveStartKey = lastkey, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('blogsource').eq(source) & Key('timest').gt(diff_ts))

else:

# query the dynamodb table for all category blogposts from up to 30 days old
blogs = ddb.query(IndexName = "visible", ScanIndexForward = True, ExclusiveStartKey = lastkey, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(str(diff_ts)))
blogs = ddb.query(IndexName = "visible", ScanIndexForward = True, ExclusiveStartKey = lastkey, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(diff_ts))

# add an entry per blog to the output list
for a in blogs['Items']:
Expand Down
3 changes: 2 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ rss-lambda

Monitor your favourite blogs through RSS and get a notification whenever a new blog is posted. New blogposts are stored in DynamoDB and (optionally) sent out to your e-mail address using SES. The Lambda function to retrieve the blogs runs every 10 minutes by default. The cost for running the solution should be less than $3 per month, which is mostly influenced by the polling frequency of the function.

You can extend the blog scraper by adding your own RSS feeds to monitor. By default various AWS related feeds are included, but you can add any of your own feeds in the *lambda-dynamo/feeds.txt* file. Within the DynamoDB table that is deployed, you can find various details about the blogposts and also the text or html versions of the content. This can be helpful in case you are building your own feed scraper or notification service.
You can extend the blog scraper by adding your own RSS feeds to monitor. By default various AWS related feeds are included, but you can add any of your own feeds in the *lambda-dynamo/feeds.txt* file. Within the DynamoDB table that is deployed, you can find various details about the blogposts and also the text or html versions of the content. This can be helpful in case you are building your own feed scraper or notification service. You can also use the included AppSync endpoint to read data from the table using GraphQL.

Optionally, a JSON output for every blog category can be uploaded as a public S3 object. These files can be included in a single page app, such as the one at https://marek.rocks . The output will be compressed using 'brotli' or something similar later in the future to save on S3 storage and bandwidth costs.

Expand Down Expand Up @@ -40,6 +40,7 @@ Roadmap
-------

- [ ] Switch to Step Functions Express to save on costs. The Express option can be used today, but is more difficult to debug in case of Lambda failures.
- [X] Add AppSync endpoint for retrieval of blog posts through Amplify.
- [X] Decompose the "monolith" Lambda function into smaller functions. This will allow for easier retries and debugging of blogpost retrieval.
- [X] Implement Step Function for better coordination of individual functionality.
- [X] Add Lambda Extension to monitor network and CPU usage of the RSS function.
Expand Down
26 changes: 24 additions & 2 deletions template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ Parameters:
- 'y'
- 'n'

CreateAppSync:
Description: Create a read only AppSync endpoint for the blogs stored in DynamoDB
Default: 'n'
Type: String
AllowedValues:
- 'y'
- 'n'

Conditions:
EnableAppSync: !Equals [ !Ref CreateAppSync, y ]

Resources:

rssgetfeed:
Expand Down Expand Up @@ -115,11 +126,12 @@ Resources:
rssfeed:
Type: 'AWS::DynamoDB::Table'
Properties:
BillingMode: PAY_PER_REQUEST
AttributeDefinitions:
- AttributeName: guid
AttributeType: S
- AttributeName: timest
AttributeType: S
AttributeType: N
- AttributeName: visible
AttributeType: S
- AttributeName: blogsource
Expand All @@ -129,7 +141,6 @@ Resources:
KeyType: HASH
- AttributeName: timest
KeyType: RANGE
BillingMode: PAY_PER_REQUEST
GlobalSecondaryIndexes:
- IndexName: visible
KeySchema:
Expand Down Expand Up @@ -178,9 +189,15 @@ Resources:
Destinations:
- CloudWatchLogsLogGroup:
LogGroupArn: !GetAtt rssblog.Arn
Events:
ScheduledEventEvery15Min:
Type: Schedule
Properties:
Schedule: rate(15 minutes)

# graphql api role
GraphQLApiRole:
Condition: EnableAppSync
Type: 'AWS::IAM::Role'
Properties:
AssumeRolePolicyDocument:
Expand Down Expand Up @@ -216,6 +233,7 @@ Resources:

# create graphql api
GraphQLApi:
Condition: EnableAppSync
Type: 'AWS::AppSync::GraphQLApi'
Properties:
XrayEnabled: true
Expand All @@ -227,13 +245,15 @@ Resources:

# define graphql schema
GraphQLSchema:
Condition: EnableAppSync
Type: 'AWS::AppSync::GraphQLSchema'
Properties:
DefinitionS3Location: './graphql/schema.graphql'
ApiId: !GetAtt 'GraphQLApi.ApiId'

# define dynamodb source
DDBDataSource:
Condition: EnableAppSync
Type: 'AWS::AppSync::DataSource'
Properties:
Type: AMAZON_DYNAMODB
Expand All @@ -246,6 +266,7 @@ Resources:

# create appsync listddbsource resolver
ListDDBResolver:
Condition: EnableAppSync
Type: 'AWS::AppSync::Resolver'
Properties:
TypeName: Query
Expand All @@ -257,6 +278,7 @@ Resources:

# create appsync resolver
QueryDDBResolver:
Condition: EnableAppSync
Type: 'AWS::AppSync::Resolver'
Properties:
TypeName: Query
Expand Down

0 comments on commit afe3358

Please sign in to comment.