-
-
Notifications
You must be signed in to change notification settings - Fork 423
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
34 changed files
with
1,204 additions
and
162 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,4 @@ packages | |
dist/windows/ | ||
_benchmark_data* | ||
*.benchmark-results | ||
generated-site/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,136 @@ | ||
language: python | ||
python: | ||
- "2.7" | ||
- "3.6" | ||
matrix: | ||
sudo: false | ||
|
||
stages: | ||
- integration | ||
- release | ||
|
||
env: | ||
global: | ||
- CACHE_NAME=${TRAVIS_JOB_NAME} | ||
|
||
|
||
_commands_provider: | ||
|
||
_test: &_test make test | ||
|
||
_lint: &_lint make lint | ||
|
||
_release: &_release make local-release | ||
|
||
_install_requirements: &_install_requirements make dep | ||
|
||
# https://ttcshelbyville.wordpress.com/2012/12/19/disable-remote-differential-compression-form-the-command-line/ | ||
_disable_windows_compression: &_disable_windows_compression "powershell Disable-WindowsOptionalFeature -Online -FeatureName MSRDC-Infrastructure" | ||
|
||
# https://travis-ci.community/t/yarn-network-troubles/333/7 | ||
_disable_windows_defender: &_disable_windows_defender "powershell Set-MpPreference -DisableRealtimeMonitoring \\$true" | ||
|
||
|
||
_steps_provider: | ||
|
||
_test: &_step_test | ||
|
||
install: | ||
- *_install_requirements | ||
before_script: *_lint | ||
script: *_test | ||
|
||
_release: &_step_release | ||
|
||
install: *_install_requirements | ||
script: *_release | ||
|
||
|
||
|
||
jobs: | ||
include: | ||
- python: "3.7" | ||
dist: xenial # Need for python 3.7 | ||
install: pip install -r requirements.txt | ||
before_script: flake8 ./bin/q ./test/test-suite --count --select=E901,E999,F821,F822,F823 --show-source --statistics | ||
script: PYTHONIOENCODING=UTF-8 test/test-all | ||
- stage: integration | ||
name: py27-macos | ||
os: osx | ||
language: generic | ||
osx_image: xcode7.3 | ||
env: | ||
- PYENV_VERSION=2.7.14 | ||
before_install: source setup-pyenv.sh | ||
<<: *_step_test | ||
cache: | ||
directories: | ||
- ${HOME}/.pyenv_cache | ||
|
||
- stage: integration | ||
name: py36-macos | ||
os: osx | ||
language: generic | ||
osx_image: xcode7.3 | ||
env: | ||
- PYENV_VERSION=3.6.4 | ||
before_install: source setup-pyenv.sh | ||
<<: *_step_test | ||
cache: | ||
directories: | ||
- ${HOME}/.pyenv_cache | ||
|
||
- stage: integration | ||
name: py37-macos | ||
os: osx | ||
language: generic | ||
osx_image: xcode7.3 | ||
env: | ||
- PYENV_VERSION=3.7.3 | ||
before_install: source setup-pyenv.sh | ||
<<: *_step_test | ||
cache: | ||
directories: | ||
- ${HOME}/.pyenv_cache | ||
|
||
- stage: integration | ||
name: py27-linux | ||
language: python | ||
python: "2.7" | ||
<<: *_step_test | ||
|
||
- stage: integration | ||
name: py36-linux | ||
language: python | ||
python: "3.6" | ||
<<: *_step_test | ||
|
||
- stage: integration | ||
name: py37-linux | ||
language: python | ||
dist: xenial | ||
python: "3.7" | ||
<<: *_step_test | ||
|
||
- stage: release | ||
name: macos | ||
os: osx | ||
language: generic | ||
osx_image: xcode7.3 | ||
env: | ||
- PYENV_VERSION=3.7.3 | ||
before_install: source setup-pyenv.sh | ||
<<: *_step_release | ||
cache: | ||
directories: | ||
- ${HOME}/.pyenv_cache | ||
|
||
- stage: release | ||
name: linux | ||
language: python | ||
dist: xenial | ||
python: "3.7" | ||
<<: *_step_release | ||
|
||
- stage: release | ||
name: windows | ||
os: windows | ||
language: shell | ||
env: | ||
- PATH=/c/Python37:/c/Python37/Scripts:$PATH | ||
before_install: | ||
- *_disable_windows_compression | ||
- *_disable_windows_defender | ||
- choco install make | ||
- choco install python --version 3.7.3 | ||
<<: *_step_release |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
SHELL := /bin/bash | ||
|
||
PROJECT_NAME=$(shell dirname "$0") | ||
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) | ||
|
||
.PHONY: test help | ||
.DEFAULT_GOAL := ci | ||
|
||
ci: lint test ## Equivelant to 'make lint test' | ||
|
||
help: ## Show this help message. | ||
|
||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' | ||
|
||
dep: ## Install the dependent libraries. | ||
|
||
pip install -r test-requirements.txt | ||
pip install -e . | ||
|
||
lint: dep ## Run lint validations. | ||
|
||
flake8 q/ --count --select=E901,E999,F821,F822,F823 --show-source --statistics | ||
|
||
test: dep ## Run the unit tests. | ||
|
||
test/test-all | ||
## TODO Bring back pytest | ||
## py.test -rs -c pytest.ini -s -v q/tests/suite.py --rootdir . | ||
|
||
release: ## Run release | ||
pip install py-ci | ||
pyci release --no-wheel-publish --wheel-universal | ||
|
||
local-release: | ||
pip install py-ci | ||
./do-manual-release.sh | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,7 @@ q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/). It c | |
## Installation. | ||
Extremely simple. | ||
|
||
Instructions for all OSs are [here](http://harelba.github.io/q/install.html). | ||
Instructions for all OSs are [here](http://harelba.github.io/q/#installation). | ||
|
||
## Examples | ||
|
||
|
@@ -20,18 +20,19 @@ q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" | |
ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" | ||
``` | ||
|
||
Go [here](http://harelba.github.io/q/examples.html) for more examples. | ||
Go [here](http://harelba.github.io/q/#examples) for more examples. | ||
|
||
## Python API | ||
A development branch for exposing q's capabilities as a <strong>Python module</strong> can be viewed <a href="https://github.com/harelba/q/tree/generic-injected-streams/PYTHON-API.markdown">here</a>, along with examples of the alpha version of the API.<br/>Existing functionality as a command-line tool will not be affected by this. Your input will be most appreciated. | ||
|
||
## Change log | ||
Click [here](http://harelba.github.io/q/changelog.html) to see the change log. | ||
|
||
## Contact | ||
Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. | ||
|
||
Harel Ben-Attia, [email protected], [@harelba](https://twitter.com/harelba) on Twitter | ||
Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/) | ||
|
||
Twitter [@harelba](https://twitter.com/harelba) | ||
|
||
Email [[email protected]](mailto:[email protected]) | ||
|
||
q on twitter: #qtextasdata | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/usr/bin/env python | ||
|
||
q_version = '2.0.12' | ||
|
||
|
||
if __name__ == '__main__': | ||
print(q_version) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,8 +30,7 @@ | |
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
q_version = "2.0.6" | ||
from .__version__ import q_version | ||
|
||
__all__ = [ 'QTextAsData' ] | ||
|
||
|
@@ -476,16 +475,18 @@ def __init__(self, mode, expected_column_count, input_delimiter, skip_header=Fal | |
self.rows = [] | ||
self.skip_header = skip_header | ||
self.header_row = None | ||
self.header_row_filename = None | ||
self.expected_column_count = expected_column_count | ||
self.input_delimiter = input_delimiter | ||
self.disable_column_type_detection = disable_column_type_detection | ||
|
||
def analyze(self, col_vals): | ||
def analyze(self, filename, col_vals): | ||
if self.inferred: | ||
raise Exception("Already inferred columns") | ||
|
||
if self.skip_header and self.header_row is None: | ||
self.header_row = col_vals | ||
self.header_row_filename = filename | ||
else: | ||
self.rows.append(col_vals) | ||
|
||
|
@@ -905,17 +906,36 @@ def _pre_populate(self,dialect): | |
mfs = MaterializedFileState(filename,f,self.encoding,dialect,is_stdin) | ||
self.materialized_file_dict[filename] = mfs | ||
|
||
def _should_skip_extra_headers(self, filenumber, filename, mfs, col_vals): | ||
if not self.skip_header: | ||
return False | ||
|
||
if filenumber == 0: | ||
return False | ||
|
||
header_already_exists = self.column_inferer.header_row is not None | ||
|
||
is_extra_header = self.skip_header and mfs.lines_read == 1 and header_already_exists | ||
|
||
if is_extra_header: | ||
if tuple(self.column_inferer.header_row) != tuple(col_vals): | ||
raise BadHeaderException("Extra header {} in file {} mismatches original header {} from file {}. Table name is {}".format(",".join(col_vals),mfs.filename,",".join(self.column_inferer.header_row),self.column_inferer.header_row_filename,self.filenames_str)) | ||
|
||
return is_extra_header | ||
|
||
def _populate(self,dialect,stop_after_analysis=False): | ||
total_data_lines_read = 0 | ||
|
||
# For each match | ||
for filename in self.materialized_file_list: | ||
for filenumber,filename in enumerate(self.materialized_file_list): | ||
mfs = self.materialized_file_dict[filename] | ||
|
||
try: | ||
try: | ||
for col_vals in mfs.read_file_using_csv(): | ||
self._insert_row(col_vals) | ||
if self._should_skip_extra_headers(filenumber,filename,mfs,col_vals): | ||
continue | ||
self._insert_row(filename, col_vals) | ||
if stop_after_analysis and self.column_inferer.inferred: | ||
return | ||
if mfs.lines_read == 0 and self.skip_header: | ||
|
@@ -937,7 +957,7 @@ def _populate(self,dialect,stop_after_analysis=False): | |
|
||
if not self.table_created: | ||
self.column_inferer.force_analysis() | ||
self._do_create_table() | ||
self._do_create_table(filename) | ||
|
||
|
||
if total_data_lines_read == 0: | ||
|
@@ -960,20 +980,20 @@ def populate(self,dialect,stop_after_analysis=False): | |
self.state = TableCreatorState.FULLY_READ | ||
return | ||
|
||
def _flush_pre_creation_rows(self): | ||
def _flush_pre_creation_rows(self, filename): | ||
for i, col_vals in enumerate(self.pre_creation_rows): | ||
if self.skip_header and i == 0: | ||
# skip header line | ||
continue | ||
self._insert_row(col_vals) | ||
self._insert_row(filename, col_vals) | ||
self._flush_inserts() | ||
self.pre_creation_rows = [] | ||
|
||
def _insert_row(self, col_vals): | ||
def _insert_row(self, filename, col_vals): | ||
# If table has not been created yet | ||
if not self.table_created: | ||
# Try to create it along with another "example" line of data | ||
self.try_to_create_table(col_vals) | ||
self.try_to_create_table(filename, col_vals) | ||
|
||
# If the table is still not created, then we don't have enough data, just | ||
# store the data and return | ||
|
@@ -1069,19 +1089,19 @@ def _flush_inserts(self): | |
# print self.db.execute_and_fetch(self.db.generate_end_transaction()) | ||
self.buffered_inserts = [] | ||
|
||
def try_to_create_table(self, col_vals): | ||
def try_to_create_table(self, filename, col_vals): | ||
if self.table_created: | ||
raise Exception('Table is already created') | ||
|
||
# Add that line to the column inferer | ||
result = self.column_inferer.analyze(col_vals) | ||
result = self.column_inferer.analyze(filename, col_vals) | ||
# If inferer succeeded, | ||
if result: | ||
self._do_create_table() | ||
self._do_create_table(filename) | ||
else: | ||
pass # We don't have enough information for creating the table yet | ||
|
||
def _do_create_table(self): | ||
def _do_create_table(self,filename): | ||
# Then generate a temp table name | ||
self.table_name = self.db.generate_temp_table_name() | ||
# Get the column definition dict from the inferer | ||
|
@@ -1101,7 +1121,7 @@ def _do_create_table(self): | |
self.db.execute_and_fetch(create_table_stmt) | ||
# Mark the table as created | ||
self.table_created = True | ||
self._flush_pre_creation_rows() | ||
self._flush_pre_creation_rows(filename) | ||
|
||
def drop_table(self): | ||
if self.table_created: | ||
|
@@ -1122,7 +1142,8 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): | |
|
||
def print_credentials(): | ||
print("q version %s" % q_version, file=sys.stderr) | ||
print("Copyright (C) 2012-2017 Harel Ben-Attia ([email protected], @harelba on twitter)", file=sys.stderr) | ||
print("Python: %s" % " // ".join([str(x).strip() for x in sys.version.split("\n")]), file=sys.stderr) | ||
print("Copyright (C) 2012-2019 Harel Ben-Attia ([email protected], @harelba on twitter)", file=sys.stderr) | ||
print("http://harelba.github.io/q/", file=sys.stderr) | ||
print(file=sys.stderr) | ||
|
||
|
@@ -1403,7 +1424,7 @@ def _execute(self,query_str,input_params=None,stdin_file=None,stdin_filename='-' | |
msg = str(e) | ||
error = QError(e,"query error: %s" % msg,1) | ||
if "no such column" in msg and effective_input_params.skip_header: | ||
warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names')) | ||
warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names. Another issue might be that the file contains a BOM. Files that are encoded with UTF8 and contain a BOM can be read by specifying `-e utf-9-sig` in the command line. Support for non-UTF8 encoding will be provided in the future.')) | ||
except ColumnCountMismatchException as e: | ||
error = QError(e,e.msg,2) | ||
except (UnicodeDecodeError, UnicodeError) as e: | ||
|
Oops, something went wrong.