schema, lambda and template bugfixes related to appsync endpoint

Dimibe · Dec 29, 2020 · afe3358 · afe3358
1 parent cd7b95c
commit afe3358
Show file tree

Hide file tree

Showing 7 changed files with 39 additions and 15 deletions.
diff --git a/graphql/getlist-request.vtl b/graphql/getlist-request.vtl
@@ -1,5 +1,6 @@
 {
   "version": "2017-02-28",
   "operation": "Scan",
+  "index": "timest",
   "nextToken": #if( $context.args.nextToken ) "$context.args.nextToken" #else null #end
 }
diff --git a/graphql/getquery-request.vtl b/graphql/getquery-request.vtl
@@ -3,9 +3,10 @@
   "operation" : "Query",
   "index" : "visible",
   "query": {
-    "expression": "visible = :visible",
+    "expression": "visible = :visible and timest > :timest",
     "expressionValues": {
-      ":visible" : { "S" : "y" }
+      ":visible" : { "S" : "y" },
+      ":timest" : { "N": $context.arguments.timest }
     }
   },
   "nextToken": #if( $context.args.nextToken ) "$context.args.nextToken" #else null #end

diff --git a/graphql/schema.graphql b/graphql/schema.graphql
@@ -6,7 +6,7 @@ type Query {
   getddbsource(guid: String!, timest: String!): ddbsource
   listddbsource(filter: TableddbsourceFilterInput, limit: Int, nextToken: String): ddbsourceConnection
   queryddbsourceByTimest(after: String, blogsource: String!, first: Int): ddbsourceConnection
-  queryddbsourceByVisible(filter: TableddbsourceFilterInput, after: String, first: Int, visible: String!): ddbsourceConnection
+  queryddbsourceByVisible(filter: TableddbsourceFilterInput, after: String, first: Int, visible: String!, timest: Int!): ddbsourceConnection
 }
 
 type ddbsource {
@@ -27,7 +27,6 @@ type ddbsourceConnection {
   nextToken: String
 }
 
-
 input TableddbsourceFilterInput {
   blogsource: TableStringFilterInput
   guid: TableStringFilterInput

diff --git a/lambda-crawl/crawl.py b/lambda-crawl/crawl.py
@@ -33,7 +33,7 @@ def get_guids(ts):
 	guids = []
 
 	# get the guid values up to x days ago
-	queryres = ddb.query(ScanIndexForward = True, IndexName = 'visible', ProjectionExpression = 'guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(str(ts)))
+	queryres = ddb.query(ScanIndexForward = True, IndexName = 'visible', ProjectionExpression = 'guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(ts))
 
 	for x in queryres['Items']:
 		if 'guid' in x:
@@ -42,7 +42,7 @@ def get_guids(ts):
 
 	# paginate the query in case more than 100 results are returned
 	while 'LastEvaluatedKey' in queryres:
-		queryres = ddb.query(ExclusiveStartKey = queryres['LastEvaluatedKey'], ScanIndexForward = True, IndexName = 'visible', ProjectionExpression = 'guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(str(ts)))
+		queryres = ddb.query(ExclusiveStartKey = queryres['LastEvaluatedKey'], ScanIndexForward = True, IndexName = 'visible', ProjectionExpression = 'guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(ts))
 
 		for x in queryres['Items']:
 			if 'guid' in x:

diff --git a/lambda-getfeed/getfeed.py b/lambda-getfeed/getfeed.py
@@ -43,7 +43,7 @@ def put_dynamo(timest_post, title, cleantxt, rawhtml, description, link, blogsou
 	ddb.put_item(
 		TableName = os.environ['dynamo_table'], 
 		Item = {
-			'timest' : timest_post,			# store the unix timestamp of the post
+			'timest' : timest_post,			# store the unix timestamp of the post as an int
 			'datestr' : datestr_post,		# store the human friendly timestamp of the post
 			'title' : title,
 			'description' : description,	# store the short rss feed description of the content
@@ -204,7 +204,7 @@ def get_feed(url, blogsource, guids):
 			blogupdate = True
 
 			# put record to dynamodb
-			put_dynamo(str(timest_post), title, cleantxt, rawhtml, description, link, blogsource, author, guid, tags, category, datestr_post)
+			put_dynamo(timest_post, title, cleantxt, rawhtml, description, link, blogsource, author, guid, tags, category, datestr_post)
 
 			# add blog to newblogs list
 			newblogs.append(str(blogsource) + ' ' + str(title) + ' ' + str(guid))
@@ -310,12 +310,12 @@ def get_table_json(blogsource):
 	if blogsource != 'all':
 
 		# query the dynamodb table for blogposts of a specific category from up to 1 day ago
-		blogs = ddb.query(IndexName = "timest", ScanIndexForward = True, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('blogsource').eq(blogsource) & Key('timest').gt(str(diff_ts)))
+		blogs = ddb.query(IndexName = "timest", ScanIndexForward = True, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('blogsource').eq(blogsource) & Key('timest').gt(diff_ts))
 
 	else:
 
 		# query the dynamodb table for all category blogposts from up to 1 day ago
-		blogs = ddb.query(IndexName = "visible", ScanIndexForward = True, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(str(diff_ts)))
+		blogs = ddb.query(IndexName = "visible", ScanIndexForward = True, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(diff_ts))
 
 	# iterate over the returned items
 	for a in blogs['Items']:
@@ -335,12 +335,12 @@ def get_table_json(blogsource):
 			if blogsource != 'all':
 
 				# query the dynamodb table for blogposts of a specific category 
-				blogs = ddb.query(IndexName = "timest", ScanIndexForward = True, ExclusiveStartKey = lastkey, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('blogsource').eq(source) & Key('timest').gt(str(diff_ts)))
+				blogs = ddb.query(IndexName = "timest", ScanIndexForward = True, ExclusiveStartKey = lastkey, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('blogsource').eq(source) & Key('timest').gt(diff_ts))
 
 			else:
 
 				# query the dynamodb table for all category blogposts from up to 30 days old
-				blogs = ddb.query(IndexName = "visible", ScanIndexForward = True, ExclusiveStartKey = lastkey, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(str(diff_ts)))
+				blogs = ddb.query(IndexName = "visible", ScanIndexForward = True, ExclusiveStartKey = lastkey, ProjectionExpression = 'blogsource, datestr, timest, title, author, description, link, guid', KeyConditionExpression = Key('visible').eq('y') & Key('timest').gt(diff_ts))
 
 			# add an entry per blog to the output list
 			for a in blogs['Items']:

diff --git a/readme.md b/readme.md
@@ -3,7 +3,7 @@ rss-lambda
 
 Monitor your favourite blogs through RSS and get a notification whenever a new blog is posted. New blogposts are stored in DynamoDB and (optionally) sent out to your e-mail address using SES. The Lambda function to retrieve the blogs runs every 10 minutes by default. The cost for running the solution should be less than $3 per month, which is mostly influenced by the polling frequency of the function. 
 
-You can extend the blog scraper by adding your own RSS feeds to monitor. By default various AWS related feeds are included, but you can add any of your own feeds in the *lambda-dynamo/feeds.txt* file. Within the DynamoDB table that is deployed, you can find various details about the blogposts and also the text or html versions of the content. This can be helpful in case you are building your own feed scraper or notification service. 
+You can extend the blog scraper by adding your own RSS feeds to monitor. By default various AWS related feeds are included, but you can add any of your own feeds in the *lambda-dynamo/feeds.txt* file. Within the DynamoDB table that is deployed, you can find various details about the blogposts and also the text or html versions of the content. This can be helpful in case you are building your own feed scraper or notification service. You can also use the included AppSync endpoint to read data from the table using GraphQL. 
 
 Optionally, a JSON output for every blog category can be uploaded as a public S3 object. These files can be included in a single page app, such as the one at https://marek.rocks . The output will be compressed using 'brotli' or something similar later in the future to save on S3 storage and bandwidth costs. 
 
@@ -40,6 +40,7 @@ Roadmap
 -------
 
 - [ ] Switch to Step Functions Express to save on costs. The Express option can be used today, but is more difficult to debug in case of Lambda failures. 
+- [X] Add AppSync endpoint for retrieval of blog posts through Amplify. 
 - [X] Decompose the "monolith" Lambda function into smaller functions. This will allow for easier retries and debugging of blogpost retrieval. 
 - [X] Implement Step Function for better coordination of individual functionality.
 - [X] Add Lambda Extension to monitor network and CPU usage of the RSS function. 

diff --git a/template.yaml b/template.yaml
@@ -29,6 +29,17 @@ Parameters:
       - 'y'
       - 'n'
 
+  CreateAppSync:
+    Description: Create a read only AppSync endpoint for the blogs stored in DynamoDB
+    Default: 'n'
+    Type: String
+    AllowedValues:
+      - 'y'
+      - 'n'
+
+Conditions: 
+  EnableAppSync: !Equals [ !Ref CreateAppSync, y ]
+
 Resources:
 
   rssgetfeed:
@@ -115,11 +126,12 @@ Resources:
   rssfeed:
     Type: 'AWS::DynamoDB::Table'
     Properties:
+      BillingMode: PAY_PER_REQUEST
       AttributeDefinitions:
       - AttributeName: guid
         AttributeType: S
       - AttributeName: timest
-        AttributeType: S
+        AttributeType: N
       - AttributeName: visible
         AttributeType: S
       - AttributeName: blogsource
@@ -129,7 +141,6 @@ Resources:
         KeyType: HASH
       - AttributeName: timest
         KeyType: RANGE  
-      BillingMode: PAY_PER_REQUEST
       GlobalSecondaryIndexes:
       - IndexName: visible
         KeySchema:
@@ -178,9 +189,15 @@ Resources:
         Destinations:
           - CloudWatchLogsLogGroup:
               LogGroupArn: !GetAtt rssblog.Arn
+      Events:
+        ScheduledEventEvery15Min:
+          Type: Schedule
+          Properties:
+            Schedule: rate(15 minutes)
 
   # graphql api role
   GraphQLApiRole:
+    Condition: EnableAppSync 
     Type: 'AWS::IAM::Role'
     Properties:
       AssumeRolePolicyDocument:
@@ -216,6 +233,7 @@ Resources:
 
   # create graphql api
   GraphQLApi:
+    Condition: EnableAppSync 
     Type: 'AWS::AppSync::GraphQLApi'
     Properties:
       XrayEnabled: true
@@ -227,13 +245,15 @@ Resources:
 
   # define graphql schema
   GraphQLSchema:
+    Condition: EnableAppSync 
     Type: 'AWS::AppSync::GraphQLSchema'
     Properties:
       DefinitionS3Location: './graphql/schema.graphql'
       ApiId: !GetAtt 'GraphQLApi.ApiId'
 
   # define dynamodb source
   DDBDataSource:
+    Condition: EnableAppSync 
     Type: 'AWS::AppSync::DataSource'
     Properties:
       Type: AMAZON_DYNAMODB
@@ -246,6 +266,7 @@ Resources:
 
   # create appsync listddbsource resolver
   ListDDBResolver:
+    Condition: EnableAppSync 
     Type: 'AWS::AppSync::Resolver'
     Properties:
       TypeName: Query
@@ -257,6 +278,7 @@ Resources:
 
   # create appsync resolver
   QueryDDBResolver:
+    Condition: EnableAppSync 
     Type: 'AWS::AppSync::Resolver'
     Properties:
       TypeName: Query