|
| 1 | +import logging |
| 2 | +import requests |
| 3 | +import re |
| 4 | +import urllib.request |
| 5 | +import urllib.parse |
| 6 | +import urllib.error |
| 7 | +from bs4 import BeautifulSoup |
| 8 | +import ssl |
| 9 | +import itertools |
| 10 | +from telegram.ext import Updater, CommandHandler, MessageHandler, Filters |
| 11 | +import decouple |
| 12 | + |
| 13 | +# Enable logging |
| 14 | +logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
| 15 | + level=logging.INFO) |
| 16 | + |
| 17 | +logger = logging.getLogger(__name__) |
| 18 | + |
| 19 | +TOKEN = decouple.config("API_KEY") |
| 20 | + |
| 21 | +# Define a few command handlers. These usually take the two arguments update and |
| 22 | +# context. Error handlers also receive the raised TelegramError object in error. |
| 23 | + |
| 24 | + |
| 25 | +def start(update, context): |
| 26 | + """Send a message when the command /start is issued.""" |
| 27 | + update.message.reply_text( |
| 28 | + 'What can this bot do?\n\nThis bot gives brief information about any movie from IMDb website' |
| 29 | + + '\nSend /name movie_name to know the genre and rating of the movie.\nSend /genre genre_name to' |
| 30 | + + 'get the list of movies belonging to that genre' |
| 31 | + ) |
| 32 | + |
| 33 | + |
| 34 | +def help(update, context): |
| 35 | + """Send a message when the command /help is issued.""" |
| 36 | + update.message.reply_text('Help!') |
| 37 | + |
| 38 | + |
| 39 | +def genre(update, context): |
| 40 | + """Send a list of movies when the command /genre is issued.""" |
| 41 | + url = 'https://www.imdb.com/search/title/' |
| 42 | + genre = str(update.message.text)[7:] |
| 43 | + print(genre) |
| 44 | + r = requests.get(url+'?genres='+genre) |
| 45 | + soup = BeautifulSoup(r.text, "html.parser") |
| 46 | + title = soup.find('title') |
| 47 | + if title.string == 'IMDb: Advanced Title Search - IMDb': |
| 48 | + update.message.reply_text("Sorry,No such genre.Try again") |
| 49 | + else: |
| 50 | + res = [] |
| 51 | + res.append(title.string+'\n') |
| 52 | + tags = soup('a') |
| 53 | + for tag in tags: |
| 54 | + movie = re.search('<a href=\"/title/.*>(.*?)</a>', str(tag)) |
| 55 | + try: |
| 56 | + if "&" in movie.group(1): |
| 57 | + movie.group(1).replace("&", "&") |
| 58 | + res.append(movie.group(1)) |
| 59 | + except: |
| 60 | + pass |
| 61 | + stri = "" |
| 62 | + for i in res: |
| 63 | + stri += i+'\n' |
| 64 | + update.message.reply_text(stri) |
| 65 | + |
| 66 | + |
| 67 | +def name(update, context): |
| 68 | + """Send the first 3 search results of the movie name in IMDb site when the command /name is issued.""" |
| 69 | + movie = str(update.message.text)[6:] |
| 70 | + print(movie) |
| 71 | + res = get_info(movie) |
| 72 | + stri = "" |
| 73 | + for i in res: |
| 74 | + for a in i: |
| 75 | + stri += a+'\n' |
| 76 | + stri += '\n' |
| 77 | + update.message.reply_text(stri) |
| 78 | + |
| 79 | + |
| 80 | +def error(update, context): |
| 81 | + """Log Errors caused by Updates.""" |
| 82 | + logger.warning('Update "%s" caused error "%s"', update, context.error) |
| 83 | + |
| 84 | + |
| 85 | +def get_info(movie): |
| 86 | + "To scrape IMDb and get genre and rating of the movie " |
| 87 | + url = 'https://www.imdb.com/find?q=' |
| 88 | + r = requests.get(url+movie+'&ref_=nv_sr_sm') |
| 89 | + soup = BeautifulSoup(r.text, "html.parser") |
| 90 | + title = soup.find('title') |
| 91 | + tags = soup('a') |
| 92 | + pre_url = "" |
| 93 | + count = 0 |
| 94 | + lis = [] |
| 95 | + res = [] |
| 96 | + for tag in tags: |
| 97 | + if(count > 2): |
| 98 | + break |
| 99 | + m = re.search('<a href=.*>(.*?)</a>', str(tag)) |
| 100 | + try: |
| 101 | + lis = [] |
| 102 | + link = re.search('/title/(.*?)/', str(m)) |
| 103 | + new_url = 'https://www.imdb.com'+str(link.group(0)) |
| 104 | + if new_url != pre_url: |
| 105 | + html = requests.get(new_url) |
| 106 | + soup_each_title = BeautifulSoup(html.text, "html.parser") |
| 107 | + movie_title = soup_each_title.find( |
| 108 | + 'title').string.replace('- IMDb', ' ') |
| 109 | + anchor = soup_each_title('a') |
| 110 | + genre_string = "Genre : " |
| 111 | + for each in anchor: |
| 112 | + genre = re.search( |
| 113 | + '<a href=\"/search/title\?genres=.*> (.*?)</a>', str(each)) |
| 114 | + try: |
| 115 | + genre_string += genre.group(1)+' ' |
| 116 | + except: |
| 117 | + pass |
| 118 | + strong_tag = soup_each_title('strong') |
| 119 | + for i in strong_tag: |
| 120 | + rating = re.search('<strong title=\"(.*?) based', str(i)) |
| 121 | + try: |
| 122 | + rating_string = "IMDb Rating : "+rating.group(1) |
| 123 | + except: |
| 124 | + pass |
| 125 | + details = "For more details : "+new_url |
| 126 | + lis.append(movie_title) |
| 127 | + lis.append(genre_string) |
| 128 | + lis.append(rating_string) |
| 129 | + lis.append(details) |
| 130 | + pre_url = new_url |
| 131 | + count += 1 |
| 132 | + res.append(lis) |
| 133 | + except: |
| 134 | + pass |
| 135 | + return res |
| 136 | + |
| 137 | + |
| 138 | +def main(): |
| 139 | + """Start the bot.""" |
| 140 | + # Create the Updater and pass it your bot's token. |
| 141 | + # Make sure to set use_context=True to use the new context based callbacks |
| 142 | + updater = Updater(TOKEN, use_context=True) |
| 143 | + |
| 144 | + # Get the dispatcher to register handlers |
| 145 | + dp = updater.dispatcher |
| 146 | + |
| 147 | + # on different commands - reply in Telegram |
| 148 | + dp.add_handler(CommandHandler("start", start)) |
| 149 | + dp.add_handler(CommandHandler("help", help)) |
| 150 | + dp.add_handler(CommandHandler("name", name)) |
| 151 | + dp.add_handler(CommandHandler("genre", genre)) |
| 152 | + |
| 153 | + # log all errors |
| 154 | + dp.add_error_handler(error) |
| 155 | + |
| 156 | + # Start the Bot |
| 157 | + updater.start_polling() |
| 158 | + |
| 159 | + # Run the bot until you press Ctrl-C or the process receives SIGINT, |
| 160 | + # SIGTERM or SIGABRT. This should be used most of the time, since |
| 161 | + # start_polling() is non-blocking and will stop the bot gracefully. |
| 162 | + updater.idle() |
| 163 | + |
| 164 | + |
| 165 | +if __name__ == '__main__': |
| 166 | + main() |
0 commit comments