1. regula falsi method for loop
2. regula falsi method while loop
3. newton raphson method for loop
4. newton raphson method while loop
5. trapezoidal rule
6. simpsons 1/3 rule
7. simpsons 3/8 rule
8. backward difference formula
9. forward difference formula
10. lagrange interpolation formula
11. divided difference formula
12. eulers formula
13. runge kutta 2nd order
14. runge kutta 4nd order
2. regula falsi method while loop
3. newton raphson method for loop
4. newton raphson method while loop
5. trapezoidal rule
6. simpsons 1/3 rule
7. simpsons 3/8 rule
8. backward difference formula
9. forward difference formula
10. lagrange interpolation formula
11. divided difference formula
12. eulers formula
13. runge kutta 2nd order
14. runge kutta 4nd order
7 Comments
import re
ReplyDeleteimport nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.probability import FreqDist
from heapq import nlargest
nltk.download('punkt')
nltk.download('stopwords')
def preprocess_text(text):
# Remove special characters and digits
processed_text = re.sub(r'[^A-Za-z\s]', '', text)
return processed_text
def extractive_summarization(text, num_sentences=3):
# Tokenize the text into sentences
sentences = sent_tokenize(text)
# Tokenize the text into words
words = word_tokenize(text.lower())
# Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word not in stop_words]
# Calculate word frequencies
word_freq = FreqDist(filtered_words)
# Calculate sentence scores based on word frequencies
sentence_scores = {}
for sentence in sentences:
for word in word_tokenize(sentence.lower()):
if word in word_freq:
if sentence not in sentence_scores:
sentence_scores[sentence] = word_freq[word]
else:
sentence_scores[sentence] += word_freq[word]
# Select the top N sentences with the highest scores
summary_sentences = nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
# Join the summary sentences to form the summary
summary = ' '.join(summary_sentences)
return summary
if __name__ == "__main__":
# Sample text paragraph
text = """Data analytics is the process of examining data sets to draw conclusions about the information they contain,
increasingly with the aid of specialized systems and software. Data analytics technologies and techniques are widely used
in commercial industries to enable organizations to make more-informed business decisions. Scientists and researchers also
use analytics tools to verify or disprove scientific models, theories and hypotheses."""
# Preprocess the text
processed_text = preprocess_text(text)
print("Processed Text:\n", processed_text)
# Generate summary using extractive summarization
summary = extractive_summarization(processed_text)
print("\nSummary:\n", summary)
import re
ReplyDeleteimport nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.probability import FreqDist
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
def preprocess_text(text):
# Remove special characters and digits
processed_text = re.sub(r'[^A-Za-z\s]', '', text)
return processed_text
def remove_stopwords(words):
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words]
return filtered_words
def plot_word_frequencies(freq_dist):
plt.figure(figsize=(10, 6))
freq_dist.plot(30, cumulative=False)
plt.show()
def plot_wordcloud(text):
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
if __name__ == "__main__":
# Sample text paragraph
text = """Data analytics is the process of examining data sets to draw conclusions about the information they contain,
increasingly with the aid of specialized systems and software. Data analytics technologies and techniques are widely used
in commercial industries to enable organizations to make more-informed business decisions. Scientists and researchers also
use analytics tools to verify or disprove scientific models, theories and hypotheses."""
# Preprocess the text
processed_text = preprocess_text(text)
# Tokenize the paragraph to extract words and sentences
words = word_tokenize(processed_text)
sentences = sent_tokenize(text)
# Remove stopwords
filtered_words = remove_stopwords(words)
# Calculate the word frequency distribution
freq_dist = FreqDist(filtered_words)
# Print results
print("Processed Text:\n", processed_text)
print("\nWords:\n", words)
print("\nSentences:\n", sentences)
print("\nFiltered Words:\n", filtered_words)
print("\nWord Frequency Distribution:\n", freq_dist)
# Plot word frequencies
plot_word_frequencies(freq_dist)
# Plot wordcloud
plot_wordcloud(processed_text)
import nltk
ReplyDeletefrom nltk.sentiment.vader import SentimentIntensityAnalyzer
# Download the VADER lexicon
nltk.download('vader_lexicon')
def sentiment_analysis(review):
sid = SentimentIntensityAnalyzer()
sentiment_scores = sid.polarity_scores(review)
sentiment = 'Neutral'
if sentiment_scores['compound'] >= 0.05:
sentiment = 'Positive'
elif sentiment_scores['compound'] <= -0.05:
sentiment = 'Negative'
return sentiment, sentiment_scores
if __name__ == "__main__":
# Review messages
reviews = [
"I purchased headphones online. I am very happy with the product.",
"I saw the movie yesterday. The animation was really good but the script was ok.",
"I enjoy listening to music",
"I take a walk in the park everyday"
]
# Perform sentiment analysis on each review message
for review in reviews:
sentiment, scores = sentiment_analysis(review)
print(f"Review: {review}")
print(f"Sentiment: {sentiment}")
print(f"Sentiment Scores: {scores}\n")
import re
ReplyDeleteimport nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
def read_whatsapp_chat(file_path):
with open('whatsapp_chat.txt', 'r', encoding='utf-8') as file:
data = file.read()
return data
def preprocess_text(text):
# Remove special characters and digits
processed_text = re.sub(r'[^A-Za-z\s]', '', text)
return processed_text
def remove_stopwords_and_lemmatize(words):
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
filtered_words = [lemmatizer.lemmatize(word.lower()) for word in words if word.lower() not in stop_words]
return filtered_words
def plot_wordcloud(text):
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
if __name__ == "__main__":
# File path to the exported WhatsApp chat .txt file
file_path = "path/to/whatsapp_chat.txt"
# Step i: Read the exported WhatsApp chat .txt file
chat_data = read_whatsapp_chat(file_path)
# Step ii: Tokenize the read data into sentences
sentences = sent_tokenize(chat_data)
print("Tokenized Sentences:\n", sentences)
# Tokenize the data into words
words = word_tokenize(chat_data)
# Step iii: Remove stopwords and perform lemmatization
processed_text = preprocess_text(chat_data)
filtered_words = remove_stopwords_and_lemmatize(word_tokenize(processed_text))
# Print the processed words
print("\nFiltered and Lemmatized Words:\n", filtered_words)
# Join the filtered words for wordcloud generation
filtered_text = ' '.join(filtered_words)
# Step iv: Plot the wordcloud for the given data
plot_wordcloud(filtered_text)
import pandas as pd
ReplyDeleteimport matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud
# Download necessary NLTK resources
nltk.download('stopwords')
# Function to preprocess text by removing stopwords
def preprocess_text(text):
stop_words = set(stopwords.words('english'))
words = text.split()
filtered_words = [word for word in words if word.lower() not in stop_words]
return ' '.join(filtered_words)
def convert_to_numeric(value):
# Remove any commas and convert to float
value = value.replace(',', '')
if 'M' in value:
return float(value.replace('M', '')) * 1_000_000
elif 'K' in value:
return float(value.replace('K', '')) * 1_000
return float(value)
# Load the dataset
#url = 'https://www.kaggle.com/datasets/prasertk/top-1000-instagram-influencers/download'
df = pd.read_csv('top_1000_instagrammers.csv')
# Convert 'Followers' and 'Authentic engagement' columns to numeric
df['Followers'] = df['Followers'].apply(convert_to_numeric)
df['Authentic Engagement'] = df['Authentic Engagement'].apply(convert_to_numeric)
# Inspect column names
print("Column names:\n", df.columns)
# Inspect the first few rows of the dataset
print("\nFirst few rows of the dataset:\n", df.head())
# i. Find the top 5 Instagram influencers from India
top_5_india = df[df['Audience Country'] == 'India'].nlargest(5, 'Followers')
print("\nTop 5 Instagram influencers from India:\n", top_5_india[['Name', 'Followers']])
# ii. Find the Instagram account having the least number of followers
least_followers = df.nsmallest(1, 'Followers')
print("\nInstagram account with the least number of followers:\n", least_followers[['Name', 'Followers']])
# iii. Read the "Category" column, remove stopwords, and plot the wordcloud
categories = df['Category'].dropna().apply(preprocess_text)
all_categories_text = ' '.join(categories)
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(all_categories_text)
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
# iv. Group the Instagram accounts category-wise
grouped_by_category = df.groupby('Category')['Name'].count().reset_index()
print("\nNumber of Instagram accounts per category:\n", grouped_by_category)
# v. Visualize the dataset and plot the relationship between Followers and Authentic engagement
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='Followers', y='Authentic Engagement')
plt.title('Relationship between Followers and Authentic Engagement')
plt.xlabel('Followers')
plt.ylabel('Authentic Engagement')
plt.show()
import pandas as pd
ReplyDeleteimport nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
# i. Read the dataset and perform data cleaning operations on it
url = 'covid_2021_1.csv'
# Specify the correct delimiter (e.g., comma, semicolon, tab, etc.)
delimiter = ',' # Change this if needed
# Load the dataset
df = pd.read_csv(url, delimiter=delimiter, error_bad_lines=False)
# Perform data cleaning operations
df.dropna(subset=['comment_text'], inplace=True) # Remove rows with missing comment_text
df['comment_text'] = df['comment_text'].astype(str) # Ensure all comments are strings
# Print the first few rows of the cleaned dataset
print("Cleaned Dataset:\n", df.head())
# ii. Tokenize the comments in words
df['tokenized_comments'] = df['comment_text'].apply(word_tokenize)
# Print the tokenized comments
print("\nTokenized Comments:\n", df['tokenized_comments'].head())
# iii. Perform sentiment analysis and find the percentage of positive, negative, and neutral comments
analyzer = SentimentIntensityAnalyzer()
def analyze_sentiment(text):
sentiment_scores = analyzer.polarity_scores(text)
if sentiment_scores['compound'] >= 0.05:
return 'Positive'
elif sentiment_scores['compound'] <= -0.05:
return 'Negative'
else:
return 'Neutral'
df['sentiment'] = df['comment_text'].apply(analyze_sentiment)
# Calculate the percentage of each sentiment
sentiment_counts = df['sentiment'].value_counts(normalize=True) * 100
print("\nSentiment Analysis Percentages:\n", sentiment_counts)
import pandas as pd
ReplyDeleteimport matplotlib.pyplot as plt
import seaborn as sns
# Dictionary of file paths for different countries
files = {
"GB": "GBvideos.csv",
"MX": "MXvideos.csv",
"KR": "KRvideos.csv",
"DE": "DEvideos.csv",
"FR": "FRvideos.csv",
"US": "USvideos.csv",
"IN": "INvideos.csv",
"RU": "RUvideos.csv",
"JP": "JPvideos.csv",
"CA": "CAvideos.csv"
}
# Read each CSV file into a DataFrame and store it in a dictionary
df_dict = {country: pd.read_csv(path, encoding='latin1') for country, path in files.items()}
# Perform operations on each DataFrame
for country, df in df_dict.items():
# Drop rows with missing values
df = df.dropna()
# Convert 'views', 'likes', 'dislikes', and 'comment_count' columns to numeric
df['views'] = pd.to_numeric(df['views'], errors='coerce')
df['likes'] = pd.to_numeric(df['likes'], errors='coerce')
df['dislikes'] = pd.to_numeric(df['dislikes'], errors='coerce')
df['comment_count'] = pd.to_numeric(df['comment_count'], errors='coerce')
# Print the first few rows of the cleaned dataset
print(f"\nCleaned Dataset for {country}:\n", df.head())
# ii. Find the total views, total likes, total dislikes, and comment count
total_views = df['views'].sum()
total_likes = df['likes'].sum()
total_dislikes = df['dislikes'].sum()
total_comment_count = df['comment_count'].sum()
print(f"\nTotal Views for {country}:", total_views)
print(f"Total Likes for {country}:", total_likes)
print(f"Total Dislikes for {country}:", total_dislikes)
print(f"Total Comment Count for {country}:", total_comment_count)
# iii. Find the least and topmost liked and commented videos
topmost_liked_video = df.loc[df['likes'].idxmax()]
least_liked_video = df.loc[df['likes'].idxmin()]
topmost_commented_video = df.loc[df['comment_count'].idxmax()]
least_commented_video = df.loc[df['comment_count'].idxmin()]
print(f"\nTopmost Liked Video for {country}:\n", topmost_liked_video)
print(f"\nLeast Liked Video for {country}:\n", least_liked_video)
print(f"\nTopmost Commented Video for {country}:\n", topmost_commented_video)
print(f"\nLeast Commented Video for {country}:\n", least_commented_video)
# iv. Perform year-wise statistics for views and plot the analyzed data
df['trending_date'] = pd.to_datetime(df['trending_date'], format='%y.%d.%m')
df['year'] = df['trending_date'].dt.year
yearly_views = df.groupby('year')['views'].sum().reset_index()
print(f"\nYear-wise Views for {country}:\n", yearly_views)
plt.figure(figsize=(10, 6))
sns.barplot(x='year', y='views', data=yearly_views)
plt.title(f'Year-wise Total Views for {country}')
plt.xlabel('Year')
plt.ylabel('Total Views')
plt.show()
# v. Plot the viewers who reacted on videos (Likes + Dislikes)
df['total_reactions'] = df['likes'] + df['dislikes']
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='views', y='total_reactions')
plt.title(f'Viewers Who Reacted on Videos for {country}')
plt.xlabel('Views')
plt.ylabel('Total Reactions (Likes + Dislikes)')
plt.show()