pyspark.sql.Column.isNotNull
isNotNull() : True if the current expression is NOT null.
isNull() : True if the current expression is null.
With this you can get the total count of null or not null values in the column using PySpark
from pyspark.sql import SparkSession spark = SparkSession.builder.appName("isNotNull @ Freshers.in ").getOrCreate() emp_schema= ["name","id","age","gender","dept","state","salary","increment"] emp_data = [ ("Sam",1010,30,"Male","IT",None,125000,8.1), ("Peter",1020,32,"Male","HR","NY",90000,7.0), ("Tom",1030,34,"Male","IT","CT",85000,5.0), ("Joe",1040,55,"Male","HR",None,60000,4.0), ("Barry",1050,34,"Male","IT","OR",95000,6.0)] df = spark.createDataFrame(data=emp_data,schema=emp_schema)
df.show()
+-----+----+---+------+----+-----+------+---------+ | name| id|age|gender|dept|state|salary|increment| +-----+----+---+------+----+-----+------+---------+ | Sam|1010| 30| Male| IT| null|125000| 8.1| |Peter|1020| 32| Male| HR| NY| 90000| 7.0| | Tom|1030| 34| Male| IT| CT| 85000| 5.0| | Joe|1040| 55| Male| HR| null| 60000| 4.0| |Barry|1050| 34| Male| IT| OR| 95000| 6.0| +-----+----+---+------+----+-----+------+---------+
df.filter(df.state.isNotNull()).show()
+-----+----+---+------+----+-----+------+---------+ | name| id|age|gender|dept|state|salary|increment| +-----+----+---+------+----+-----+------+---------+ |Peter|1020| 32| Male| HR| NY| 90000| 7.0| | Tom|1030| 34| Male| IT| CT| 85000| 5.0| |Barry|1050| 34| Male| IT| OR| 95000| 6.0| +-----+----+---+------+----+-----+------+---------+
df.filter(df.state.isNull()).show()
+----+----+---+------+----+-----+------+---------+ |name| id|age|gender|dept|state|salary|increment| +----+----+---+------+----+-----+------+---------+ | Sam|1010| 30| Male| IT| null|125000| 8.1| | Joe|1040| 55| Male| HR| null| 60000| 4.0| +----+----+---+------+----+-----+------+---------+ df.filter(df.state.isNull()).count() 2