Discord bots have become an integral part of many servers, enhancing user experience and providing various functionalities. In this tutorial, we'll explore how to create a Discord bot using the Gemini AI Pro API, coupled with vision capabilities, using Node.js.

Prerequisites

Before we dive into the code, make sure you have the following:

  • Node.js installed on your machine.
  • A Discord bot token. You can create one by following the Discord Developer Portal guide.
  • Gemini API key. Obtain it by signing up on the Gemini AI website.




// Import necessary modules
import { Client, GatewayIntentBits, REST } from 'discord.js';
import axios from 'axios';

// Your config file with Discord bot token and Gemini API key
import config from './config.json';

const client: any = new Client({
  intents: [GatewayIntentBits.Guilds, GatewayIntentBits.GuildMessages, GatewayIntentBits.MessageContent],
});

const rest = new REST({ version: '10' }).setToken(`${config.DISCORD_BOT_TOKEN}`);

client.on('messageCreate', async (message: any) => {
  if (message.author.bot || !message.content || message.content === '') return; // Ignore bot messages

  // Send a "thinking" message
  const thinkingMessage = await message.reply('Thinking...');

  // Call the Discord Gemini AI function
  const response = await discordGeminiAI(message);

  // Remove the "thinking" message and reply with the actual response
  thinkingMessage.delete();
  message.reply(response);
});


// Function to interact with Gemini AI
const discordGeminiAI = async (message: any) => {
  let modelName;
  if (!message.attachments || message.attachments.size === 0) {
    modelName = 'gemini-pro';
  } else {
    modelName = 'gemini-pro-vision';
  }

  // Initialize GoogleGenerativeAI with API key
  const genAI = new GoogleGenerativeAI(`${config.GEMINI_API_KEY}`);

  // Get the generative model
  const model = genAI.getGenerativeModel({ model: modelName });

  const prompt = message.content;
  const images: any = [];

  // Map attachments to promises for image conversion
  const attachmentPromises = message.attachments.map(async (attachment: any) => {
    try {
      const img = await imageUrlToBase64(attachment?.attachment, attachment?.contentType);
      images.push(img);
    } catch (error: any) {
      console.error('Error:', error.message);
      throw error;
    }
  });

  try {
    // Wait for all image conversion promises to resolve
    await Promise.all(attachmentPromises);

    // Generate content using prompt and images
    const result = await model.generateContent([prompt, ...images]);
    const response = await result.response;
    const text = response.text();

    // Check text length and log/return accordingly
    if (text.length <= 2000) {
      console.log(text);
      return text;
    } else {
      const truncatedText = text.substring(0, 2000);
      console.log(truncatedText);
      return truncatedText;
    }
  } catch (error) {
    console.error('Error generating content:', error);
    return 'Error generating content.';
  }
};

// Function to convert image URL to base64
const imageUrlToBase64 = async (url: string, contentType: string) => {
  try {
    const response = await axios.get(url, { responseType: 'arraybuffer' });
    const base64 = Buffer.from(response.data, 'binary').toString('base64');
    return `data:${contentType};base64,${base64}`;
  } catch (error) {
    console.error('Error converting image to base64:', error);
    throw error;
  }
};

// Run the bot
client.login(`${config.DISCORD_BOT_TOKEN}`);

Conclusion
Congratulations! You've created a Discord bot with Gemini AI Pro + Vision capabilities using Node.js. Feel free to customize the code further based on your needs and explore additional features provided by Discord and Gemini AI.