Skip to content

Commit

Permalink
Update to arrow 37 (jorgecarleitao#1465)
Browse files Browse the repository at this point in the history
Co-authored-by: sundyli <[email protected]>
  • Loading branch information
tustvold and sundy-li authored Apr 13, 2023
1 parent 33f6ba1 commit e14c238
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 30 deletions.
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ odbc-api = { version = "0.36", optional = true }
ahash = "0.8"

# Support conversion to/from arrow-rs
arrow-buffer = { version = "36.0.0", optional = true }
arrow-schema = { version = "36.0.0", optional = true }
arrow-data = { version = "36.0.0", optional = true }
arrow-array = { version = "36.0.0", optional = true }
arrow-buffer = { version = "37.0.0", optional = true }
arrow-schema = { version = "37.0.0", optional = true }
arrow-data = { version = "37.0.0", optional = true }
arrow-array = { version = "37.0.0", optional = true }

[target.wasm32-unknown-unknown.dependencies]
getrandom = { version = "0.2", features = ["js"] }
Expand Down
21 changes: 21 additions & 0 deletions src/datatypes/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ impl From<Field> for arrow_schema::Field {
#[cfg(feature = "arrow")]
impl From<arrow_schema::Field> for Field {
fn from(value: arrow_schema::Field) -> Self {
(&value).into()
}
}

#[cfg(feature = "arrow")]
impl From<&arrow_schema::Field> for Field {
fn from(value: &arrow_schema::Field) -> Self {
let data_type = value.data_type().clone().into();
let metadata = value
.metadata()
Expand All @@ -73,3 +80,17 @@ impl From<arrow_schema::Field> for Field {
Self::new(value.name(), data_type, value.is_nullable()).with_metadata(metadata)
}
}

#[cfg(feature = "arrow")]
impl From<arrow_schema::FieldRef> for Field {
fn from(value: arrow_schema::FieldRef) -> Self {
value.as_ref().into()
}
}

#[cfg(feature = "arrow")]
impl From<&arrow_schema::FieldRef> for Field {
fn from(value: &arrow_schema::FieldRef) -> Self {
value.as_ref().into()
}
}
47 changes: 25 additions & 22 deletions src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ pub enum DataType {
#[cfg(feature = "arrow")]
impl From<DataType> for arrow_schema::DataType {
fn from(value: DataType) -> Self {
use arrow_schema::Field as ArrowField;
use arrow_schema::UnionFields;

match value {
DataType::Null => Self::Null,
DataType::Boolean => Self::Boolean,
Expand All @@ -177,7 +180,7 @@ impl From<DataType> for arrow_schema::DataType {
DataType::Float16 => Self::Float16,
DataType::Float32 => Self::Float32,
DataType::Float64 => Self::Float64,
DataType::Timestamp(unit, tz) => Self::Timestamp(unit.into(), tz),
DataType::Timestamp(unit, tz) => Self::Timestamp(unit.into(), tz.map(Into::into)),
DataType::Date32 => Self::Date32,
DataType::Date64 => Self::Date64,
DataType::Time32(unit) => Self::Time32(unit.into()),
Expand All @@ -189,23 +192,23 @@ impl From<DataType> for arrow_schema::DataType {
DataType::LargeBinary => Self::LargeBinary,
DataType::Utf8 => Self::Utf8,
DataType::LargeUtf8 => Self::LargeUtf8,
DataType::List(f) => Self::List(Box::new((*f).into())),
DataType::List(f) => Self::List(Arc::new((*f).into())),
DataType::FixedSizeList(f, size) => {
Self::FixedSizeList(Box::new((*f).into()), size as _)
Self::FixedSizeList(Arc::new((*f).into()), size as _)
}
DataType::LargeList(f) => Self::LargeList(Box::new((*f).into())),
DataType::Struct(f) => Self::Struct(f.into_iter().map(Into::into).collect()),
DataType::LargeList(f) => Self::LargeList(Arc::new((*f).into())),
DataType::Struct(f) => Self::Struct(f.into_iter().map(ArrowField::from).collect()),
DataType::Union(fields, Some(ids), mode) => {
let ids = ids.into_iter().map(|x| x as _).collect();
let fields = fields.into_iter().map(Into::into).collect();
Self::Union(fields, ids, mode.into())
let ids = ids.into_iter().map(|x| x as _);
let fields = fields.into_iter().map(ArrowField::from);
Self::Union(UnionFields::new(ids, fields), mode.into())
}
DataType::Union(fields, None, mode) => {
let ids = (0..fields.len() as i8).collect();
let fields = fields.into_iter().map(Into::into).collect();
Self::Union(fields, ids, mode.into())
let ids = 0..fields.len() as i8;
let fields = fields.into_iter().map(ArrowField::from);
Self::Union(UnionFields::new(ids, fields), mode.into())
}
DataType::Map(f, ordered) => Self::Map(Box::new((*f).into()), ordered),
DataType::Map(f, ordered) => Self::Map(Arc::new((*f).into()), ordered),
DataType::Dictionary(key, value, _) => Self::Dictionary(
Box::new(DataType::from(key).into()),
Box::new((*value).into()),
Expand Down Expand Up @@ -235,7 +238,9 @@ impl From<arrow_schema::DataType> for DataType {
DataType::Float16 => Self::Float16,
DataType::Float32 => Self::Float32,
DataType::Float64 => Self::Float64,
DataType::Timestamp(unit, tz) => Self::Timestamp(unit.into(), tz),
DataType::Timestamp(unit, tz) => {
Self::Timestamp(unit.into(), tz.map(|x| x.to_string()))
}
DataType::Date32 => Self::Date32,
DataType::Date64 => Self::Date64,
DataType::Time32(unit) => Self::Time32(unit.into()),
Expand All @@ -247,18 +252,16 @@ impl From<arrow_schema::DataType> for DataType {
DataType::LargeBinary => Self::LargeBinary,
DataType::Utf8 => Self::Utf8,
DataType::LargeUtf8 => Self::LargeUtf8,
DataType::List(f) => Self::List(Box::new((*f).into())),
DataType::FixedSizeList(f, size) => {
Self::FixedSizeList(Box::new((*f).into()), size as _)
}
DataType::LargeList(f) => Self::LargeList(Box::new((*f).into())),
DataType::List(f) => Self::List(Box::new(f.into())),
DataType::FixedSizeList(f, size) => Self::FixedSizeList(Box::new(f.into()), size as _),
DataType::LargeList(f) => Self::LargeList(Box::new(f.into())),
DataType::Struct(f) => Self::Struct(f.into_iter().map(Into::into).collect()),
DataType::Union(fields, ids, mode) => {
let ids = ids.into_iter().map(|x| x as _).collect();
let fields = fields.into_iter().map(Into::into).collect();
DataType::Union(fields, mode) => {
let ids = fields.iter().map(|(x, _)| x as _).collect();
let fields = fields.iter().map(|(_, f)| f.into()).collect();
Self::Union(fields, Some(ids), mode.into())
}
DataType::Map(f, ordered) => Self::Map(Box::new((*f).into()), ordered),
DataType::Map(f, ordered) => Self::Map(Box::new(f.into()), ordered),
DataType::Dictionary(key, value) => {
let key = match *key {
DataType::Int8 => IntegerType::Int8,
Expand Down
4 changes: 2 additions & 2 deletions src/io/json/read/deserialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,10 +535,10 @@ pub(crate) fn _deserialize<'a, A: Borrow<Value<'a>>>(
let iter = rows.iter().map(|row| match row.borrow() {
Value::Number(v) => Some(deserialize_int_single(*v)),
Value::String(v) => match (tu, tz) {
(_, None) => temporal_conversions::utf8_to_naive_timestamp_scalar(v, "%+", &tu),
(_, None) => temporal_conversions::utf8_to_naive_timestamp_scalar(v, "%+", tu),
(_, Some(ref tz)) => {
let tz = temporal_conversions::parse_offset(tz).unwrap();
temporal_conversions::utf8_to_timestamp_scalar(v, "%+", &tz, &tu)
temporal_conversions::utf8_to_timestamp_scalar(v, "%+", &tz, tu)
}
},
_ => None,
Expand Down
2 changes: 1 addition & 1 deletion tests/it/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fn test_arrow2_roundtrip(array: &dyn arrow_array::Array) {
assert_eq!(back.len(), array.len());

match array.data_type() {
d @ arrow_schema::DataType::Union(_, _, arrow_schema::UnionMode::Sparse) => {
d @ arrow_schema::DataType::Union(_, arrow_schema::UnionMode::Sparse) => {
// Temporary workaround https://github.com/apache/arrow-rs/issues/4044
let data = array.to_data();
let type_ids = data.buffers()[0].slice_with_length(data.offset(), data.len());
Expand Down
1 change: 0 additions & 1 deletion tests/it/temporal_conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ fn scalar_tz_aware_no_timezone() {

let tz = temporal_conversions::parse_offset("-02:00").unwrap();
let str = "2023-04-07T10:23:34.000000000-02:00";
let _nanos_expected = 1680870214000000000 as i64;

// seconds
let r = temporal_conversions::utf8_to_timestamp_scalar(str, fmt, &tz, &TimeUnit::Second);
Expand Down
1 change: 1 addition & 0 deletions tests/it/test_util.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#[allow(dead_code)]
pub fn arrow_test_data() -> String {
"testing/arrow-testing/data".to_string()
}

0 comments on commit e14c238

Please sign in to comment.