Program
function[]=lang(X,Y,x0)
n=length(X)
p=0
x=poly(0,'x')
for i=1:n
l=1
for j=[1:i-1,i+1:n]
l=l*(x-X(j))/(X(i)-X(j))
end
p=p+l*Y(i)
end
disp(p,'langrages poly=')
printf('value of poly @ %.4f=%.4f',x0,horner(p,x0))
endfunction
Output:
X=[-1 1 4 6]
X =
-1. 1. 4. 6.
Y=[-4 8 -41 78]
Y =
-4. 8. -41. 78.
lang(X,Y,2)
langrages poly=
2 3
17.685714 +3.1952381x -15.685714x +2.8047619x
value of poly @ 2.0000=-16.2286
function[]=lang(X,Y,x0)
n=length(X)
p=0
x=poly(0,'x')
for i=1:n
l=1
for j=[1:i-1,i+1:n]
l=l*(x-X(j))/(X(i)-X(j))
end
p=p+l*Y(i)
end
disp(p,'langrages poly=')
printf('value of poly @ %.4f=%.4f',x0,horner(p,x0))
endfunction
Output:
X=[-1 1 4 6]
X =
-1. 1. 4. 6.
Y=[-4 8 -41 78]
Y =
-4. 8. -41. 78.
lang(X,Y,2)
langrages poly=
2 3
17.685714 +3.1952381x -15.685714x +2.8047619x
value of poly @ 2.0000=-16.2286
15 Comments
s16
ReplyDeleteQ2).Consider any text paragraph. Preprocess the text to remove any special characters and digits.
Generate the summary using extractive summarization pprocess.
Ans:
Import re
Import nltk
From nltk.corpus import stopwords
From nltk.tokenize import sent_tokenize, word_tokenize
From heapq import nlargest
# Sample text paragraph you can write any text
Text = “Natural language processing (NLP) is a subfield of linguistics, computer science, information
engineering, and artificial intelligence concerned with the interactions between computers and human
languages, in particular how to program computers to process and analyze large amounts of natural
language data. Challenges in natural language processing frequently involve speech recognition, natural
language understanding, and natural language generation. The history of natural language processing
generally started in the 1950s, although work can be found from earlier periods.”
# Remove special characters and digits
Text = re.sub(‘[^a-zA-Z]’, ‘ ‘, text)
# Tokenize the text into sentences
Sentences = sent_tokenize(text)
# Tokenize each sentence into words and remove stop words
Stop_words = set(stopwords.words(‘english’))
Words = []
For sentence in sentences:
Words.extend(word_tokenize(sentence))
Words = [word.lower() for word in words if word.lower() not in stop_words]
# Calculate word frequency
Word_freq = nltk.FreqDist(words)
# Calculate sentence scores based on word frequency
Sentence_scores = {}
For sentence in sentences:
For word in word_tokenize(sentence.lower()):
If word in word_freq:
If len(sentence.split(‘ ‘)) < 30:
If sentence not in sentence_scores:
Sentence_scores[sentence] = word_freq[word]
Else:
Sentence_scores[sentence] += word_freq[word]
# Generate summary by selecting top 3 sentences with highest scores
Summary_sentences = nlargest(3, sentence_scores, key=sentence_scores.get)
Summary = ‘ ‘.join(summary_sentences)
Print(summary)
s17
ReplyDeleteQ. 2)Consider text paragraph.So, keep working. Keep striving. Never give up. Fall down seven times, get
Up eight. Ease is a greater threat to progress than hardship. Ease is a greater threat to progress than
Hardship. So, keep moving, keep growing, keep learning. See you at work.Preprocess the text to remove
Any special characters and digits. Generate the summary using extractive summarization process.
Ans:
Import re
From nltk.tokenize import sent_tokenize
# Text paragraph
Text = “So, keep working. Keep striving. Never give up. Fall down seven times, get up eight. Ease is a
greater threat to progress than hardship. Ease is a greater threat to progress than hardship. So, keep
moving, keep growing, keep learning. See you at work.”
# Remove special characters and digits
Text = re.sub(‘[^A-Za-z]+’, ‘ ‘, text)
# Tokenize the sentences
Sentences = sent_tokenize(text)
# Calculate the score of each sentence based on the number of words
# The sentences with more words will have a higher score
Scores = {}
For sentence in sentences:
Words = sentence.split()
Score = len(words)
Scores[sentence] = score
# Sort the sentences based on their scores
Sorted_sentences = sorted(scores.items(), key=lambda x: x[1], reverse=True)
# Extract the top 2 sentences with the highest scores as the summary
Summary_sentences = [sentence[0] for sentence in sorted_sentences[:2]]
Summary = “ “.join(summary_sentences)
# Print the summary
Print(summary)
s18
ReplyDeleteQ. 2)Consider any text paragraph. Remove the stopwords. Tokenize the paragraph to extract words and
Sentences. Calculate the word frequency distribution and plot the frequencies. Plot the wordcloud of the
Txt.
Ans:
# Install the libraries
!pip install nltk matplotlib wordcloud
# Import the necessary modules
Import nltk
From nltk.corpus import stopwords
From nltk.tokenize import word_tokenize, sent_tokenize
From nltk.probability import FreqDist
Import matplotlib.pyplot as plt
From wordcloud import WordCloud
# Download the stopwords corpus
Nltk.download(‘stopwords’)
# Define the text paragraph
Text = “Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed tristique ante et velit vestibulum, vel
pharetra orci iaculis. Nullam mattis risus quis augue tincidunt rhoncus. Morbi varius, arcu vitae
scelerisque laoreet, magna est imperdiet quam, sit amet ultrices lectus justo id enim. Sed dictum suscipit
commodo. Sed maximus consequat risus, nec pharetra nibh interdum quis. Etiam eget quam vel augue
dictum dignissim sit amet nec elit. Nunc at sapien dolor. Nulla vitae iaculis lorem. Suspendisse potenti.
Sed non ante turpis. Morbi consectetur, arcu a vestibulum suscipit, mauris eros convallis nibh, nec
feugiat orci enim sit amet enim. Aliquam erat volutpat. Etiam vel nisi id neque viverra dapibus non non
lectus.”
# Tokenize the paragraph to extract words and sentences
Words = word_tokenize(text.lower())
Sentences = sent_tokenize(text)
# Remove the stopwords from the extracted words
Stop_words = set(stopwords.words(‘english’))
Filtered_words = [word for word in words if word.casefold() not in stop_words]
# Calculate the word frequency distribution and plot the frequencies using matplotlib
Fdist = FreqDist(filtered_words)
Fdist.plot(30, cumulative=False)
Plt.show()
# Plot the wordcloud of the text using wordcloud
Wordcloud = WordCloud(width = 800, height = 800,
Background_color =’white’,
Stopwords = stop_words,
Min_font_size = 10).generate(text)
# plot the WordCloud image
Plt.figure(figsize = (8, 8), facecolor = None)
Plt.imshow(wordcloud)
Plt.axis(“off”)
Plt.tight_layout(pad = 0)
Plt.show()
s19
ReplyDeleteQ. 2)Download the movie_review.csv dataset from Kaggle by using the following link
:https://www.kaggle.com/nltkdata/movie-review/version/3?select=movie_review.csv to perform
Sentiment analysis on above dataset and create a wordcloud.
Ans:
Import pandas as pd
From textblob import TextBlob
From wordcloud import WordCloud, STOPWORDS
Import matplotlib.pyplot as plt
# Load the dataset
Df = pd.read_csv(‘movie_review.csv’)
# Add a column for sentiment analysis using TextBlob
Df[‘Sentiment’] = df[‘Review’].apply(lambda x: TextBlob(x).sentiment.polarity)
# Create a new dataframe for positive reviews only
Pos_df = df[df[‘Sentiment’] > 0.2]
# Create a wordcloud for positive reviews
Wordcloud = WordCloud(width = 800, height = 800,
Background_color =’white’,
Stopwords = STOPWORDS,
Min_font_size = 10).generate(‘ ‘.join(pos_df[‘Review’]))
# Plot the wordcloud
Plt.figure(figsize = (8, 8), facecolor = None)
Plt.imshow(wordcloud)
Plt.axis(“off”)
Plt.tight_layout(pad = 0)
Plt.show()
s20
ReplyDeleteQ. 2)Consider text paragraph.”””Hello all, Welcome to Python Programming Academy. Python
Programming Academy is a nice platform to learn new programming skills. It is difficult to get enrolled
In this Academy.”””Remove the stopwords.
Ans:
Import nltk
From nltk.corpus import stopwords
Nltk.download(‘stopwords’)
# Text paragraph
Text = “Hello all, Welcome to Python Programming Academy. Python Programming Academy is a nice
platform to learn new programming skills. It is difficult to get enrolled in this Academy.”
# Tokenize the text
Tokens = nltk.word_tokenize(text)
# Remove stopwords
Stop_words = set(stopwords.words(‘english’))
Filtered_tokens = [word for word in tokens if not word.lower() in stop_words]
# Print the filtered tokens
Print(filtered_tokens)
s21
ReplyDeleteQ. 2)Build a simple linear regression model for User Data.
Ans:
Import pandas as pd
From sklearn.model_selection import train_test_split
From sklearn.linear_model import LinearRegression
From sklearn.metrics import mean_squared_error, r2_score
Import matplotlib.pyplot as plt
# 1. Collect data
Data = pd.read_csv(‘user_data.csv’)
# 2. Preprocess data
Data.dropna(inplace=True)
X = data[‘age’].values.reshape(-1, 1)
Y = data[‘income’].values.reshape(-1, 1)
# 3. Split data
X_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
# 4. Train the model
Regressor = LinearRegression()
Regressor.fit(x_train, y_train)
# 5. Predict values
Y_pred = regressor.predict(x_test)
# 6. Evaluate model
Mse = mean_squared_error(y_test, y_pred)
R2 = r2_score(y_test, y_pred)
Print(“Mean squared error: “, mse)
Print(“R-squared: “, r2)
# 7. Visualize results
Plt.scatter(x_test, y_test, color=’gray’)
Plt.plot(x_test, y_pred, color=’red’, linewidth=2)
Plt.show()
s22
ReplyDeleteQ2).Consider any text paragraph. Remove the stopwords.
Ans:
Import nltk
From nltk.corpus import stopwords
From nltk.tokenize import word_tokenize
# sample text paragraph
Text = “Hello all, Welcome to Python Programming Academy. Python Programming Academy is a nice
platform to learn new programming skills. It is difficult to get enrolled in this Academy.”
# tokenize the text paragraph
Words = word_tokenize(text)
# define stopwords
Stop_words = set(stopwords.words(‘english’))
# remove stopwords
Filtered_words = [word for word in words if word.casefold() not in stop_words]
# join filtered words to form a sentence
Filtered_sentence = ‘ ‘.join(filtered_words)
Print(filtered_sentence)
s23
ReplyDeleteQ2).Consider any text paragraph. Preprocess the text to remove any special characters and
Digits.
Ans:
Import re
Text = “Hello, #world123! This is a sample text paragraph. It contains special characters and 5 digits.”
# Remove special characters and digits
Processed_text = re.sub(r’[^a-zA-Z\s]’, ‘’, text)
Print(processed_text)
s24
ReplyDeleteQ. 2) Consider the following dataset :
https://www.kaggle.com/datasets/datasnaek/youtubenew?select=INvideos.csv
Write a Python script for the following :
i.
Read the dataset and perform data cleaning operations on it.
ii.
ii. Find the total views, total likes, total dislikes and comment count.
Ans:
Import pandas as pd
# Read the dataset
Df = pd.read_csv(‘INvideos.csv’)
# Drop the columns that are not required
Df = df.drop([‘video_id’, ‘trending_date’, ‘channel_title’, ‘category_id’, ‘publish_time’, ‘tags’,
‘thumbnail_link’, ‘comments_disabled’, ‘ratings_disabled’, ‘video_error_or_removed’], axis=1)
# Convert the datatype of ‘views’, ‘likes’, ‘dislikes’, and ‘comment_count’ to integer
Df[[‘views’, ‘likes’, ‘dislikes’, ‘comment_count’]] = df[[‘views’, ‘likes’, ‘dislikes’,
‘comment_count’]].astype(int)
# Find the total views, likes, dislikes, and comment count
Total_views = df[‘views’].sum()
Total_likes = df[‘likes’].sum()
Total_dislikes = df[‘dislikes’].sum()
Total_comments = df[‘comment_count’].sum()
Print(‘Total Views:’, total_views)
Print(‘Total Likes:’, total_likes)
Print(‘Total Dislikes:’, total_dislikes)
Print(‘Total Comments:’, total_comments)
s25
ReplyDeleteQ. 2) Consider the following dataset :
https://www.kaggle.com/datasets/seungguini/youtube-commentsfor-covid19-relatedvideos?select=covi
d_2021_1.csv
Write a Python script for the following :
i.
Read the dataset and perform data cleaning operations on it.
ii.
ii. Tokenize the comments in words. Iii. Perform sentiment analysis and find the percentage of
positive, negative and neutral comments..
Ans:
Import pandas as pd
Import nltk
From nltk.sentiment.vader import SentimentIntensityAnalyzer
# read the dataset
Df = pd.read_csv(‘covid_2021_1.csv’)
# remove null values and duplicates
Df.dropna(inplace=True)
Df.drop_duplicates(subset=’Comment’, inplace=True)
# tokenize comments in words
Nltk.download(‘punkt’)
Df[‘tokens’] = df[‘Comment’].apply(nltk.word_tokenize)
# perform sentiment analysis
Nltk.download(‘vader_lexicon’)
Sia = SentimentIntensityAnalyzer()
Df[‘sentiment’] = df[‘Comment’].apply(lambda x: sia.polarity_scores(x)[‘compound’])
# calculate percentage of positive, negative, and neutral comments
Total_comments = len(df)
Positive_comments = len(df[df[‘sentiment’] > 0])
Negative_comments = len(df[df[‘sentiment’] < 0])
Neutral_comments = len(df[df[‘sentiment’] == 0])
Positive_percentage = (positive_comments / total_comments) * 100
Negative_percentage = (negative_comments / total_comments) * 100
Neutral_percentage = (neutral_comments / total_comments) * 100
# print the results
Print(‘Total Comments:’, total_comments)
Print(‘Positive Comments:’, positive_comments, ‘(‘, positive_percentage, ‘%)’)
Print(‘Negative Comments:’, negative_comments, ‘(‘, negative_percentage, ‘%)’)
Print(‘Neutral Comments:’, neutral_comments, ‘(‘, neutral_percentage, ‘%)’)
s26
ReplyDeleteQ. 2 )Consider text paragraph. “””Hello all, Welcome to Python Programming Academy. Python
Programming Academy is a nice platform to learn new programming skills. It is difficult to get enrolled
In this Academy.””” Preprocess the text to remove any special characters and digits. Generate the
Summary using extractive summarization process. Q.
Ans:
Import re
From nltk.tokenize import sent_tokenize
From sklearn.feature_extraction.text import TfidfVectorizer
From sklearn.metrics.pairwise import cosine_similarity
# Text to summarize
Text = “Hello all, Welcome to Python Programming Academy. Python Programming Academy is a nice
platform to learn new programming skills. It is difficult to get enrolled in this Academy.”
# Preprocess the text to remove special characters and digits
Preprocessed_text = re.sub(r’[^a-zA-Z\s]’, ‘’, text)
# Tokenize the preprocessed text into sentences
Sentences = sent_tokenize(preprocessed_text)
# Calculate the importance score of each sentence using TF-IDF
Vectorizer = TfidfVectorizer()
Tfidf_matrix = vectorizer.fit_transform(sentences)
Similarity_matrix = cosine_similarity(tfidf_matrix)
# Select top N sentences based on their importance score
N = 2
Top_sentences = sorted(range(len(similarity_matrix[-1])), key=lambda i: similarity_matrix[-1][i])[-N:]
# Concatenate the top sentences to form the summary
Summary = ‘’
For i in top_sentences:
Summary += sentences[i] + ‘ ‘
Print(summary)
s27
ReplyDeleteQ. 2 ) Create your own transactions dataset and apply the above process on your dataset
Ans:
Import random
Import csv
# Generate random transaction data
Transactions = []
For i in range(1, 101):
Transaction_id = i
Transaction_date = f”2022-05-{random.randint(1, 31):02d}”
Customer_id = random.randint(1, 10)
Item_id = random.choice([“A”, “B”, “C”])
Item_price = round(random.uniform(10.0, 100.0), 2)
Quantity = random.randint(1, 10)
Transactions.append([transaction_id, transaction_date, customer_id, item_id, item_price, quantity])
# Save the data to a CSV file
With open(‘transactions.csv’, ‘w’, newline=’’) as csvfile:
Writer = csv.writer(csvfile)
Writer.writerow([“Transaction ID”, “Transaction Date”, “Customer ID”, “Item ID”, “Item Price”,
“Quantity”])
For transaction in transactions:
Writer.writerow(transaction)
Import pandas as pd
# Read the CSV file into a Pandas DataFrame
Df = pd.read_csv(‘transactions.csv’)
# Convert the “Item Price” column to numeric type
Df[‘Item Price’] = pd.to_numeric(df[‘Item Price’])
# Calculate the sales amount for each transaction
Df[‘Sales’] = df[‘Item Price’] * df[‘Quantity’]
# Group the transactions by customer ID and calculate the total sales for each customer
Total_sales = df.groupby(‘Customer ID’)[‘Sales’].sum().reset_index()
# Print the results
Print(total_sales)
s28
ReplyDeleteQ. 2 ) Build a simple linear regression model for Car Dataset.
Ans:
From sklearn.linear_model import LinearRegression
Mileage = [[10], [20], [30], [40], [50], [60], [70], [80]]
Price = [24, 19, 17, 13, 10, 7, 5, 2]
Reg = LinearRegression().fit(mileage, price)
Print(‘Intercept:’, reg.intercept_)
Print(‘Coefficient:’, reg.coef_[0])
New_mileage = [[25], [45], [65]]
Predicted_price = reg.predict(new_mileage)
Print(‘Predicted prices:’, predicted_price)
s29
ReplyDeleteQ. 2 ) Build a logistic regression model for Student Score Dataset.
Ans:
# Import necessary libraries
Import pandas as pd
From sklearn.linear_model import LogisticRegression
From sklearn.model_selection import train_test_split
From sklearn.metrics import accuracy_score
# Load the dataset
Data = pd.read_csv(‘student_scores.csv’)
# Split the data into input and output variables
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Create the logistic regression model and fit it to the training data
Classifier = LogisticRegression()
Classifier.fit(X_train, y_train)
# Make predictions on the testing set
Y_pred = classifier.predict(X_test)
# Evaluate the model’s accuracy
Accuracy = accuracy_score(y_test, y_pred)
Print(“Accuracy:”, accuracy)
s30
ReplyDeleteQ. 2 ) Create the dataset . transactions = [[‘eggs’, ‘milk’,’bread’], [‘eggs’, ‘apple’], [‘milk’, ‘bread’], [‘apple’,
‘milk’], [‘milk’, ‘apple’, ‘bread’]] .
Convert the categorical values into numeric format.Apply the apriori algorithm on the above dataset to
Generate the frequent itemsets and association rules.
Ans:
Transactions = [[‘eggs’, ‘milk’, ‘bread’], [‘eggs’, ‘apple’], [‘milk’, ‘bread’], [‘apple’, ‘milk’], [‘milk’, ‘apple’,
‘bread’]]
# Create a dictionary to map items to unique numeric values
Item_to_num = {‘eggs’: 1, ‘milk’: 2, ‘bread’: 3, ‘apple’: 4}
# Convert the categorical values in the dataset to numeric values
Numeric_transactions = []
For transaction in transactions:
Numeric_transaction = [item_to_num[item] for item in transaction]
Numeric_transactions.append(numeric_transaction)
Print(numeric_transactions)
From mlxtend.frequent_patterns import apriori, association_rules
# Generate frequent itemsets with a minimum support of 0.4
Frequent_itemsets = apriori(numeric_transactions, min_support=0.4, use_colnames=True)
# Generate association rules with a minimum confidence of 0.7
Rules = association_rules(frequent_itemsets, metric=”confidence”, min_threshold=0.7)
Print(frequent_itemsets)
Print(rules)