sentiment analysis python

We need to use twetter bot.py you can chack my github for source code

https://github.com/flavves/sentiment-analysis-twitter

# -*- coding: utf-8 -*-
"""
Created on Mon Apr 19 22:09:32 2021

@author: yazılım
"""

import selenium
from  selenium import webdriver
import pyautogui
import time



driver_path = "chromedriver"
driver = webdriver.Chrome(executable_path=driver_path)

driver.get("https://twitter.com/login")
#üye girisi

driver.find_element_by_xpath('//*[@id="react-root"]/div/div/div[2]/main/div/div/div[2]/form/div/div[1]/label/div/div[2]/div/input').send_keys("mail")
driver.find_element_by_xpath('//*[@id="react-root"]/div/div/div[2]/main/div/div/div[2]/form/div/div[2]/label/div/div[2]/div/input').send_keys("sifre")
driver.find_element_by_xpath('//*[@id="react-root"]/div/div/div[2]/main/div/div/div[2]/form/div/div[3]/div').click()
#bitti

#veri arama
driver.get("https://twitter.com/search?q=ak%C5%9Fam&src=typed_query&f=live")


time.sleep(4)

enson=driver.find_element_by_xpath('//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div/div/div[1]/div[2]/nav/div/div[2]/div/div[2]/a')
enson.click()
time.sleep(4)





SCROLL_PAUSE_TIME = 2
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")

    # Wait to load page
    time.sleep(SCROLL_PAUSE_TIME)


    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")

    # break condition
    if new_height == last_height:
        
        break
    last_height = new_height
    a=driver.find_elements_by_xpath('//div[@data-testid="tweet"]')
    for i in a:
        try:
            file =open("yeni3.txt", "a")
            i=i.text
            file.write(i)
            file.close()
            print(i)
        except UnicodeEncodeError:
            continue
        


        
        
#driver.quit()




This program stop when u want

After then we need use some str and pandas editors.

I cant explain every code type but when u get eror or something you can mail me batuhanokmen@gmail.com.

So lets start code with full code after then ı explain some parts. Please contiunue reading for all code examples.

# -*- coding: utf-8 -*-
"""
Created on Tue Apr 20 22:20:59 2021

@author: yazılım
"""


"""

metin madenciliği v1

"""

import pandas as pd



with open("yeni3.txt", "r") as dosya:
    metinler = dosya.readlines()



vektor= pd.Series(metinler)

mdf= pd.DataFrame(vektor,columns=["twitler"])
yeni_mdf=mdf.copy()

#küçük harf yapma

yeni_mdf=yeni_mdf["twitler"].apply(lambda x: " ".join(x.lower() for x in x.split()))

#noktalama işaretleri siliniyor

type(yeni_mdf)

yeni_mdf=yeni_mdf.str.replace("[^\w\s]","")


#sayıların silinmesi

yeni_mdf=yeni_mdf.str.replace("\d","")

#stopwords mantığı

yeni_mdf=pd.DataFrame(yeni_mdf,columns=["twitler"])
yeni_mdf
!pip install nltk

import nltk
nltk.download("stopwords")

from nltk.corpus import stopwords

sw=stopwords.words("turkish")
sw.append("m")
sw.append("replying to")
sw.append("h")
sw.append("to")
sw.append("replying")
sw.append("and")

yeni_mdf=yeni_mdf["twitler"].apply(lambda x: " ".join(x for x in x.split() if x not in sw))


#az geçen kelimelerin silinmesi (opsiyonel) kullanmayacağım şuan

yeni_mdf= pd.DataFrame(yeni_mdf, columns=["twitler"])

az_gecenler=pd.Series(" ".join(yeni_mdf["twitler"]).split()).value_counts()




#son hali ile ilk hali karşılaştırma

#ilk hali

mdf["twitler"][0:5]
#son hali
yeni_mdf["twitler"][0:5]

start = 2

#yeni_mdf["twitler"].str.find("kötü")


# Print the output.  
#print(df)  

with open("mutlukelimeler.txt","r", encoding="utf-8") as dosya:
    mutlu_kelimeler=dosya.readlines()

yeni_mutlu_kelimeler=[]
for i in mutlu_kelimeler:
    
    my_str = i[:-1]
    yeni_mutlu_kelimeler.append(my_str)

yeni_mdf=yeni_mdf["twitler"].dropna()
yeni_mdf.to_csv("adana.csv", encoding='utf-8',index=False)


df = pd.read_csv("adana.csv")
df=df.dropna()

df.loc[0,"status"]=""

durumlar=["poz","neg"]



sayac=1


while 1:
        
    bak_bakalim=df.iloc[sayac]
    
    kelime=bak_bakalim["twitler"]
    
    kelime_boluk=kelime.split()
    
    
    for i in kelime_boluk:
        #print(i)       
        if i in yeni_mutlu_kelimeler:
            print(i)
            df.loc[sayac,"status"] = "poz"
        else:
            df.loc[sayac,"status"] = "neg"
    sayac=sayac+1

df["status"].value_counts()

#kütüphane

from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=FutureWarning)
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import model_selection
import matplotlib.pyplot as plt
from sklearn.linear_model import RidgeCV,Lasso,Ridge,LassoCV,ElasticNet,ElasticNetCV
from sklearn.linear_model import LassoCV
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale 
from sklearn.preprocessing import StandardScaler
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import neighbors
from sklearn.svm import SVR,SVC
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from warnings import filterwarnings
filterwarnings('ignore')
from sklearn import preprocessing
lab_enc = preprocessing.LabelEncoder()
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

df=df.dropna()

train_x, test_x,train_y,test_y=model_selection.train_test_split(
    df["twitler"],
    df["status"])


encoder= preprocessing.LabelEncoder()

train_y=encoder.fit_transform(train_y)
test_y=encoder.fit_transform(test_y)

#count vectors

vectorizer=CountVectorizer()
vectorizer.fit(train_x)

x_train_count = vectorizer.transform(train_x)

x_test_count = vectorizer.transform(test_x)


#TF-IDF

tf_idf_word_vectorizer=TfidfVectorizer()
tf_idf_word_vectorizer.fit(train_x)

x_train_tf_idf_word= vectorizer.transform(train_x)
x_test_tf_idf_word= vectorizer.transform(test_x)


#ngram level

tf_idf_ngram_vectorizer=TfidfVectorizer(ngram_range=(2,3))
tf_idf_ngram_vectorizer.fit(train_x)


x_train_tf_idf_ngram=tf_idf_ngram_vectorizer.transform(train_x)
x_test_tf_idf_ngram=tf_idf_ngram_vectorizer.transform(test_x)

#karakter level

tf_idf_chars_vectorizer=TfidfVectorizer(analyzer="char",ngram_range=(2,3))
tf_idf_chars_vectorizer.fit(train_x)

x_train_tf_idf_chars=tf_idf_chars_vectorizer.transform(train_x)
x_test_tf_idf_chars=tf_idf_chars_vectorizer.transform(test_x)





#tahmin

from sklearn import linear_model
loj = linear_model.LogisticRegression()
loj_model=loj.fit(x_train_count,train_y)

accuary= model_selection.cross_val_score(loj_model, x_test_count, test_y, cv=10).mean()


print("model doğruluk oranı %s dir"%accuary)

yorum=pd.Series("bu akşam çok kötü")

v=CountVectorizer()
v.fit(train_x)
yorum= v.transform(yorum)
loj_model.predict(yorum)
#0 çıkarsa kötü 1 çıkarsa iyi





























Bir yanıt yazın