1
+ {
2
+ "cells" : [
3
+ {
4
+ "cell_type" : " code" ,
5
+ "source" : [
6
+ " from pyspark.sql import *\n " ,
7
+ " from pyspark.sql.functions import *\n " ,
8
+ " from pyspark.sql.types import *\n " ,
9
+ " \n " ,
10
+ " def to_date_df(df, fmt, fld):\n " ,
11
+ " return df.withColumn(fld, to_date(col(fld), fmt))\n "
12
+ ],
13
+ "metadata" : {
14
+ "application/vnd.databricks.v1+cell" : {
15
+ "title" : " " ,
16
+ "showTitle" : false ,
17
+ "inputWidgets" : {},
18
+ "nuid" : " 80b42010-ef17-4d97-9ff1-16f7b96464d3"
19
+ }
20
+ },
21
+ "outputs" : [
22
+ {
23
+ "output_type" : " display_data" ,
24
+ "metadata" : {
25
+ "application/vnd.databricks.v1+output" : {
26
+ "datasetInfos" : [],
27
+ "data" : " <div class=\" ansiout\" ></div>" ,
28
+ "removedWidgets" : [],
29
+ "addedWidgets" : {},
30
+ "type" : " html" ,
31
+ "arguments" : {}
32
+ }
33
+ },
34
+ "data" : {
35
+ "text/html" : [
36
+ " <style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \" Source Code Pro\" , \" Menlo\" , monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n </style>\n <div class=\" ansiout\" ></div>"
37
+ ]
38
+ }
39
+ }
40
+ ],
41
+ "execution_count" : 0
42
+ },
43
+ {
44
+ "cell_type" : " code" ,
45
+ "source" : [
46
+ " my_schema = StructType([\n StructField(\" ID\" , StringType()),\n StructField(\" EventDate\" , StringType())])\n\n my_rows = [Row(\" 123\" , \" 04/05/2020\" ), Row(\" 124\" , \" 4/5/2020\" ), Row(\" 125\" , \" 04/5/2020\" ), Row(\" 126\" , \" 4/05/2020\" )]\n my_rdd = spark.sparkContext.parallelize(my_rows, 2)\n my_df = spark.createDataFrame(my_rdd, my_schema)"
47
+ ],
48
+ "metadata" : {
49
+ "application/vnd.databricks.v1+cell" : {
50
+ "title" : " " ,
51
+ "showTitle" : false ,
52
+ "inputWidgets" : {},
53
+ "nuid" : " 4ae5aa80-cd11-4e1a-905e-cf1406a6e1c8"
54
+ }
55
+ },
56
+ "outputs" : [
57
+ {
58
+ "output_type" : " display_data" ,
59
+ "metadata" : {
60
+ "application/vnd.databricks.v1+output" : {
61
+ "datasetInfos" : [
62
+ {
63
+ "name" : " my_df" ,
64
+ "typeStr" : " pyspark.sql.dataframe.DataFrame" ,
65
+ "schema" : {
66
+ "fields" : [
67
+ {
68
+ "metadata" : {},
69
+ "name" : " ID" ,
70
+ "nullable" : true ,
71
+ "type" : " string"
72
+ },
73
+ {
74
+ "metadata" : {},
75
+ "name" : " EventDate" ,
76
+ "nullable" : true ,
77
+ "type" : " string"
78
+ }
79
+ ],
80
+ "type" : " struct"
81
+ },
82
+ "tableIdentifier" : null
83
+ }
84
+ ],
85
+ "data" : " <div class=\" ansiout\" ></div>" ,
86
+ "removedWidgets" : [],
87
+ "addedWidgets" : {},
88
+ "type" : " html" ,
89
+ "arguments" : {}
90
+ }
91
+ },
92
+ "data" : {
93
+ "text/html" : [
94
+ " <style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \" Source Code Pro\" , \" Menlo\" , monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n </style>\n <div class=\" ansiout\" ></div>"
95
+ ]
96
+ }
97
+ }
98
+ ],
99
+ "execution_count" : 0
100
+ },
101
+ {
102
+ "cell_type" : " code" ,
103
+ "source" : [
104
+ " my_df.printSchema()\n my_df.show()\n new_df = to_date_df(my_df, \" M/d/y\" , \" EventDate\" )\n new_df.printSchema()\n new_df.show() "
105
+ ],
106
+ "metadata" : {
107
+ "application/vnd.databricks.v1+cell" : {
108
+ "title" : " " ,
109
+ "showTitle" : false ,
110
+ "inputWidgets" : {},
111
+ "nuid" : " 457f165a-d68e-476a-8b32-31ed691770b2"
112
+ }
113
+ },
114
+ "outputs" : [
115
+ {
116
+ "output_type" : " display_data" ,
117
+ "metadata" : {
118
+ "application/vnd.databricks.v1+output" : {
119
+ "datasetInfos" : [
120
+ {
121
+ "name" : " new_df" ,
122
+ "typeStr" : " pyspark.sql.dataframe.DataFrame" ,
123
+ "schema" : {
124
+ "fields" : [
125
+ {
126
+ "metadata" : {},
127
+ "name" : " ID" ,
128
+ "nullable" : true ,
129
+ "type" : " string"
130
+ },
131
+ {
132
+ "metadata" : {},
133
+ "name" : " EventDate" ,
134
+ "nullable" : true ,
135
+ "type" : " date"
136
+ }
137
+ ],
138
+ "type" : " struct"
139
+ },
140
+ "tableIdentifier" : null
141
+ }
142
+ ],
143
+ "data" : " <div class=\" ansiout\" >root\n |-- ID: string (nullable = true)\n |-- EventDate: string (nullable = true)\n\n +---+----------+\n | ID| EventDate|\n +---+----------+\n |123|04/05/2020|\n |124| 4/5/2020|\n |125| 04/5/2020|\n |126| 4/05/2020|\n +---+----------+\n\n root\n |-- ID: string (nullable = true)\n |-- EventDate: date (nullable = true)\n\n +---+----------+\n | ID| EventDate|\n +---+----------+\n |123|2020-04-05|\n |124|2020-04-05|\n |125|2020-04-05|\n |126|2020-04-05|\n +---+----------+\n\n </div>" ,
144
+ "removedWidgets" : [],
145
+ "addedWidgets" : {},
146
+ "type" : " html" ,
147
+ "arguments" : {}
148
+ }
149
+ },
150
+ "data" : {
151
+ "text/html" : [
152
+ " <style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \" Source Code Pro\" , \" Menlo\" , monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n </style>\n <div class=\" ansiout\" >root\n -- ID: string (nullable = true)\n -- EventDate: string (nullable = true)\n\n +---+----------+\n ID| EventDate|\n +---+----------+\n 123|04/05/2020|\n 124| 4/5/2020|\n 125| 04/5/2020|\n 126| 4/05/2020|\n +---+----------+\n\n root\n -- ID: string (nullable = true)\n -- EventDate: date (nullable = true)\n\n +---+----------+\n ID| EventDate|\n +---+----------+\n 123|2020-04-05|\n 124|2020-04-05|\n 125|2020-04-05|\n 126|2020-04-05|\n +---+----------+\n\n </div>"
153
+ ]
154
+ }
155
+ }
156
+ ],
157
+ "execution_count" : 0
158
+ }
159
+ ],
160
+ "metadata" : {
161
+ "application/vnd.databricks.v1+notebook" : {
162
+ "notebookName" : " MyPythonNotebook" ,
163
+ "dashboards" : [],
164
+ "language" : " python" ,
165
+ "widgets" : {},
166
+ "notebookOrigID" : 343023503281504
167
+ }
168
+ },
169
+ "nbformat" : 4 ,
170
+ "nbformat_minor" : 0
171
+ }
0 commit comments