DataFrame API
Init Session
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()Create DataFrame
from datetime import datetime, date
import pandas as pd
from pyspark.sql import Row
df = spark.createDataFrame([
Row(a=16, b=2., c='jane', d=date(2010, 1, 1), e=datetime(2021, 1, 1, 12, 0)),
Row(a=16, b=3., c='john', d=date(2010, 2, 1), e=datetime(2021, 1, 2, 12, 0)),
Row(a=32, b=5., c='alex', d=date(2010, 3, 1), e=datetime(2022, 1, 3, 12, 0))
])
dfViewing Data
Last updated