import tensorflow as tf
from pyspark.sql import SparkSession
from time import time
spark = SparkSession.builder.master('local').appName("csds_test").enableHiveSupport().getOrCreate()
spark
isro = spark.read.jdbc("jdbc:mysql://localhost:3306","rrsc_aws.aws_log",properties={'user':'chandanshastri','password':'superman'})
isro.show(10,truncate=False)
isro.count()
isro.printSchema()
test = isro.filter((isro.RAINt>1)).select('RTIME','AirTemp','RelHum','RAINt')
test.show(10,truncate=False)
t=time()
isro.agg({"AirTemp":"avg"}).show()
print("Total time taken : " + str(time()-t) + " ms")
import matplotlib.pyplot as plt
rtime = test.select('RTIME').collect()
temp = test.select('AirTemp').collect()
plt.plot(rtime,temp)
isro.write.csv('isro.csv')
df3 = spark.read.csv('isro.csv',inferSchema=True)
df3.count()
df3.printSchema()
spark.sql("create database chandan")
# test.write.jdbc("jdbc:mysql://localhost:3306","rrsc_aws.aws_test",properties={'user':'chandanshastri','password':'superman'})
test.write.mode("append").saveAsTable("chandan.test")
spark.sql("use chandan").show()
spark.sql("show tables").show()
spark.sql("select * from test").show()
df3.createTempView("test4view")
spark.sql("show tables").show()
spark.sql("CREATE TABLE chandan.test4 LIKE chandan.test location 'F:/HDFS/'")
spark.sql("show tables").show()
spark.sql("insert into chandan.test4 select * from test")
spark.sql("select * from chandan.test4").show()