DOC: expand docs on sql type conversion

BowonY · Dec 8, 2014 · 56b84a0 · 56b84a0
1 parent 7d13fdd
commit 56b84a0
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 22 deletions.
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -3393,12 +3393,34 @@ the database using :func:`~pandas.DataFrame.to_sql`.
 
     data.to_sql('data', engine)
 
-With some databases, writing large DataFrames can result in errors due to packet size limitations being exceeded. This can be avoided by setting the ``chunksize`` parameter when calling ``to_sql``.  For example, the following writes ``data`` to the database in batches of 1000 rows at a time:
+With some databases, writing large DataFrames can result in errors due to
+packet size limitations being exceeded. This can be avoided by setting the
+``chunksize`` parameter when calling ``to_sql``.  For example, the following
+writes ``data`` to the database in batches of 1000 rows at a time:
 
 .. ipython:: python
 
     data.to_sql('data_chunked', engine, chunksize=1000)
 
+SQL data types
+""""""""""""""
+
+:func:`~pandas.DataFrame.to_sql` will try to map your data to an appropriate
+SQL data type based on the dtype of the data. When you have columns of dtype
+``object``, pandas will try to infer the data type.
+
+You can always override the default type by specifying the desired SQL type of
+any of the columns by using the ``dtype`` argument. This argument needs a
+dictionary mapping column names to SQLAlchemy types (or strings for the sqlite3
+fallback mode).
+For example, specifying to use the sqlalchemy ``String`` type instead of the
+default ``Text`` type for string columns:
+
+.. ipython:: python
+
+    from sqlalchemy.types import String
+    data.to_sql('data_dtype', engine, dtype={'Col_1': String})
+
 .. note::
 
     Due to the limited support for timedelta's in the different database
@@ -3413,15 +3435,6 @@ With some databases, writing large DataFrames can result in errors due to packet
     Because of this, reading the database table back in does **not** generate
     a categorical.
 
-.. note::
-
-    You can specify the SQL type of any of the columns by using the dtypes
-    parameter (a dictionary mapping column names to SQLAlchemy types). This
-    can be useful in cases where columns with NULL values are inferred by
-    Pandas to an excessively general datatype (e.g. a boolean column is is
-    inferred to be object because it has NULLs).
-
-
 Reading Tables
 ~~~~~~~~~~~~~~
 
@@ -3782,11 +3795,11 @@ is lost when exporting.
 
     *Stata* only supports string value labels, and so ``str`` is called on the
     categories when exporting data.  Exporting ``Categorical`` variables with
-    non-string categories produces a warning, and can result a loss of 
+    non-string categories produces a warning, and can result a loss of
     information if the ``str`` representations of the categories are not unique.
 
 Labeled data can similarly be imported from *Stata* data files as ``Categorical``
-variables using the keyword argument ``convert_categoricals`` (``True`` by default).  
+variables using the keyword argument ``convert_categoricals`` (``True`` by default).
 The keyword argument ``order_categoricals`` (``True`` by default) determines
  whether imported ``Categorical`` variables are ordered.
 

diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt
@@ -96,7 +96,16 @@ API changes
 Enhancements
 ~~~~~~~~~~~~
 
-- Added the ability to specify the SQL type of columns when writing a DataFrame to a database (:issue:`8778`).
+- Added the ability to specify the SQL type of columns when writing a DataFrame
+  to a database (:issue:`8778`).
+  For example, specifying to use the sqlalchemy ``String`` type instead of the
+  default ``Text`` type for string columns:
+
+  .. code-block::
+
+     from sqlalchemy.types import String
+     data.to_sql('data_dtype', engine, dtype={'Col_1': String})
+
 - Added ability to export Categorical data to Stata (:issue:`8633`).  See :ref:`here <io.stata-categorical>` for limitations of categorical variables exported to Stata data files.
 - Added ability to export Categorical data to to/from HDF5 (:issue:`7621`). Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. See :ref:`here <io.hdf5-categorical>` for an example and caveats w.r.t. prior versions of pandas.
 - Added support for ``searchsorted()`` on `Categorical` class (:issue:`8420`).

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -954,8 +954,9 @@ def to_sql(self, name, con, flavor='sqlite', schema=None, if_exists='fail',
         chunksize : int, default None
             If not None, then rows will be written in batches of this size at a
             time.  If None, all rows will be written at once.
-        dtype : Dictionary of column name to SQLAlchemy type, default None
-            Optional datatypes for SQL columns.
+        dtype : dict of column name to SQL type, default None
+            Optional specifying the datatype for columns. The SQL type should
+            be a SQLAlchemy type, or a string for sqlite3 fallback connection.
 
         """
         from pandas.io import sql
@@ -4128,7 +4129,7 @@ def func(self, axis=None, dtype=None, out=None, skipna=True,
 
                 y = _values_from_object(self).copy()
 
-                if skipna and issubclass(y.dtype.type, 
+                if skipna and issubclass(y.dtype.type,
                                          (np.datetime64, np.timedelta64)):
                     result = accum_func(y, axis)
                     mask = isnull(self)

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -518,8 +518,9 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
     chunksize : int, default None
         If not None, then rows will be written in batches of this size at a
         time.  If None, all rows will be written at once.
-    dtype : dictionary of column name to SQLAchemy type, default None
-        optional datatypes for SQL columns.
+    dtype : dict of column name to SQL type, default None
+        Optional specifying the datatype for columns. The SQL type should
+        be a SQLAlchemy type, or a string for sqlite3 fallback connection.
 
     """
     if if_exists not in ('fail', 'replace', 'append'):
@@ -1133,8 +1134,9 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
         chunksize : int, default None
             If not None, then rows will be written in batches of this size at a
             time.  If None, all rows will be written at once.
-        dtype : dictionary of column name to SQLAlchemy type, default None
-            Optional datatypes for SQL columns.
+        dtype : dict of column name to SQL type, default None
+            Optional specifying the datatype for columns. The SQL type should
+            be a SQLAlchemy type.
 
         """
         if dtype is not None:
@@ -1468,8 +1470,9 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
         chunksize : int, default None
             If not None, then rows will be written in batches of this
             size at a time. If None, all rows will be written at once.
-        dtype : dictionary of column_name to SQLite string type, default None
-            optional datatypes for SQL columns.
+        dtype : dict of column name to SQL type, default None
+            Optional specifying the datatype for columns. The SQL type should
+            be a string.
 
         """
         if dtype is not None: