Skip to content

Commit

Permalink
[doc] typos and clarity
Browse files Browse the repository at this point in the history
  • Loading branch information
rdettai committed Oct 12, 2021
1 parent d69fc9a commit 2e4452a
Show file tree
Hide file tree
Showing 10 changed files with 20 additions and 23 deletions.
2 changes: 1 addition & 1 deletion ballista/rust/client/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ impl BallistaContext {
/// Create a DataFrame from a SQL statement.
///
/// This method is `async` because queries of type `CREATE EXTERNAL TABLE`
/// might require the schema to be infered.
/// might require the schema to be inferred.
pub async fn sql(&self, sql: &str) -> Result<Arc<dyn DataFrame>> {
let mut ctx = {
let state = self.state.lock().unwrap();
Expand Down
4 changes: 2 additions & 2 deletions ballista/rust/core/src/serde/physical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ impl TryInto<protobuf::PhysicalPlanNode> for Arc<dyn ExecutionPlan> {
.as_ref()
.ok_or_else(|| {
BallistaError::General(
"projection in CsvExec dosn not exist.".to_owned(),
"projection in CsvExec does not exist.".to_owned(),
)
})?
.iter()
Expand Down Expand Up @@ -320,7 +320,7 @@ impl TryInto<protobuf::PhysicalPlanNode> for Arc<dyn ExecutionPlan> {
.as_ref()
.ok_or_else(|| {
BallistaError::General(
"projection in AvroExec dosn not exist.".to_owned(),
"projection in AvroExec does not exist.".to_owned(),
)
})?
.iter()
Expand Down
7 changes: 3 additions & 4 deletions datafusion/src/datasource/file_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ use std::fmt;
use std::sync::Arc;

use crate::arrow::datatypes::SchemaRef;
use crate::datasource::{create_max_min_accs, get_col_stats};
use crate::error::Result;
use crate::logical_plan::Expr;
use crate::physical_plan::{ExecutionPlan, Statistics};
Expand All @@ -44,15 +43,15 @@ pub struct PhysicalPlanConfig {
pub object_store: Arc<dyn ObjectStore>,
/// Schema before projection
pub schema: SchemaRef,
/// Partitioned fields to process in the executor
/// List of files to be processed, grouped into partitions
pub files: Vec<Vec<PartitionedFile>>,
/// Estimated overall statistics of source plan
/// Estimated overall statistics of the plan, taking `filters` into account
pub statistics: Statistics,
/// Columns on which to project the data
pub projection: Option<Vec<usize>>,
/// The maximum number of records per arrow column
pub batch_size: usize,
/// The filters that where pushed down to this execution plan
/// The filters that were pushed down to this execution plan
pub filters: Vec<Expr>,
/// The minimum number of records required from this source plan
pub limit: Option<usize>,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ use parquet::file::statistics::Statistics as ParquetStatistics;

use super::FileFormat;
use super::PhysicalPlanConfig;
use super::{create_max_min_accs, get_col_stats};
use crate::arrow::datatypes::{DataType, Field};
use crate::datasource::object_store::{ObjectReader, ObjectReaderStream};
use crate::datasource::{create_max_min_accs, get_col_stats};
use crate::error::DataFusionError;
use crate::error::Result;
use crate::logical_plan::combine_filters;
Expand Down
14 changes: 7 additions & 7 deletions datafusion/src/datasource/listing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub struct ListingOptions {
pub file_extension: String,
/// The file format
pub format: Arc<dyn FileFormat>,
/// The expected partition column names.
/// The expected partition column names in the folder structure.
/// For example `Vec["a", "b"]` means that the two first levels of
/// partitioning expected should be named "a" and "b":
/// - If there is a third level of partitioning it will be ignored.
Expand All @@ -55,11 +55,11 @@ pub struct ListingOptions {
/// TODO implement case where partitions.len() > 0
pub partitions: Vec<String>,
/// Set true to try to guess statistics from the files.
/// This can add a lot of overhead as it requires files to
/// be opened and partially parsed.
/// This can add a lot of overhead as it will usually require files
/// to be opened and at least partially parsed.
pub collect_stat: bool,
/// Group files to avoid that the number of partitions
/// exceeds this limit
/// Group files to avoid that the number of partitions exceeds
/// this limit
pub target_partitions: usize,
}

Expand All @@ -80,8 +80,8 @@ impl ListingOptions {
}
}

/// Infer the schema of the files at the given uri, including the partitioning
/// columns.
/// Infer the schema of the files at the given path on the provided object store.
/// The inferred schema should include the partitioning columns.
///
/// This method will not be called by the table itself but before creating it.
/// This way when creating the logical plan we can decide to resolve the schema
Expand Down
3 changes: 0 additions & 3 deletions datafusion/src/datasource/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ use std::pin::Pin;
/// if the optional `limit` is provided, includes only sufficient files
/// needed to read up to `limit` number of rows
/// TODO fix case where `num_rows` and `total_byte_size` are not defined (stat should be None instead of Some(0))
/// TODO move back to crate::datasource::mod.rs once legacy cleaned up
pub async fn get_statistics_with_limit(
all_files: impl Stream<Item = Result<(PartitionedFile, Statistics)>>,
schema: SchemaRef,
Expand Down Expand Up @@ -126,7 +125,6 @@ pub async fn get_statistics_with_limit(
#[derive(Debug, Clone)]
/// A single file that should be read, along with its schema, statistics
/// and partition column values that need to be appended to each row.
/// TODO move back to crate::datasource::mod.rs once legacy cleaned up
pub struct PartitionedFile {
/// Path for the file (e.g. URL, filesystem path, etc)
pub file_meta: FileMeta,
Expand Down Expand Up @@ -159,7 +157,6 @@ impl std::fmt::Display for PartitionedFile {

#[derive(Debug, Clone)]
/// A collection of files that should be read in a single task
/// TODO move back to crate::datasource::mod.rs once legacy cleaned up
pub struct FilePartition {
/// The index of the partition among all partitions
pub index: usize,
Expand Down
5 changes: 3 additions & 2 deletions datafusion/src/datasource/object_store/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,13 @@ async fn list_all(prefix: String) -> Result<FileMetaStream> {
}
}

/// Create a stream of `ObjectReader` by opening each file in the `files` vector
/// Create a stream of `ObjectReader` by converting each file in the `files` vector
/// into instances of `LocalFileReader`
pub fn local_object_reader_stream(files: Vec<String>) -> ObjectReaderStream {
Box::pin(futures::stream::iter(files).map(|f| Ok(local_object_reader(f))))
}

/// Helper method to convert a file location to an ObjectReader
/// Helper method to convert a file location to a `LocalFileReader`
pub fn local_object_reader(file: String) -> Arc<dyn ObjectReader> {
LocalFileSystem
.file_reader(local_file_meta(file).sized_file)
Expand Down
2 changes: 1 addition & 1 deletion datafusion/src/datasource/object_store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub enum ListEntry {
#[derive(Debug, Clone)]
pub struct SizedFile {
/// Path of the file. It is relative to the current object
/// store (it does not specify the xx:// scheme).
/// store (it does not specify the `xx://` scheme).
pub path: String,
/// File size in total
pub size: u64,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/src/execution/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ impl ExecutionContext {
/// Creates a dataframe that will execute a SQL query.
///
/// This method is `async` because queries of type `CREATE EXTERNAL TABLE`
/// might require the schema to be infered.
/// might require the schema to be inferred.
pub async fn sql(&mut self, sql: &str) -> Result<Arc<dyn DataFrame>> {
let plan = self.create_logical_plan(sql)?;
match plan {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/src/physical_plan/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ pub fn binary_operator_data_type(
rhs_type: &DataType,
) -> Result<DataType> {
// validate that it is possible to perform the operation on incoming types.
// (or the return datatype cannot be infered)
// (or the return datatype cannot be inferred)
let common_type = common_binary_type(lhs_type, op, rhs_type)?;

match op {
Expand Down

0 comments on commit 2e4452a

Please sign in to comment.