Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
harelba committed Aug 30, 2020
1 parent b2f8a0e commit 7abaab5
Show file tree
Hide file tree
Showing 34 changed files with 1,204 additions and 162 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ packages
dist/windows/
_benchmark_data*
*.benchmark-results
generated-site/
145 changes: 135 additions & 10 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,136 @@
language: python
python:
- "2.7"
- "3.6"
matrix:
sudo: false

stages:
- integration
- release

env:
global:
- CACHE_NAME=${TRAVIS_JOB_NAME}


_commands_provider:

_test: &_test make test

_lint: &_lint make lint

_release: &_release make local-release

_install_requirements: &_install_requirements make dep

# https://ttcshelbyville.wordpress.com/2012/12/19/disable-remote-differential-compression-form-the-command-line/
_disable_windows_compression: &_disable_windows_compression "powershell Disable-WindowsOptionalFeature -Online -FeatureName MSRDC-Infrastructure"

# https://travis-ci.community/t/yarn-network-troubles/333/7
_disable_windows_defender: &_disable_windows_defender "powershell Set-MpPreference -DisableRealtimeMonitoring \\$true"


_steps_provider:

_test: &_step_test

install:
- *_install_requirements
before_script: *_lint
script: *_test

_release: &_step_release

install: *_install_requirements
script: *_release



jobs:
include:
- python: "3.7"
dist: xenial # Need for python 3.7
install: pip install -r requirements.txt
before_script: flake8 ./bin/q ./test/test-suite --count --select=E901,E999,F821,F822,F823 --show-source --statistics
script: PYTHONIOENCODING=UTF-8 test/test-all
- stage: integration
name: py27-macos
os: osx
language: generic
osx_image: xcode7.3
env:
- PYENV_VERSION=2.7.14
before_install: source setup-pyenv.sh
<<: *_step_test
cache:
directories:
- ${HOME}/.pyenv_cache

- stage: integration
name: py36-macos
os: osx
language: generic
osx_image: xcode7.3
env:
- PYENV_VERSION=3.6.4
before_install: source setup-pyenv.sh
<<: *_step_test
cache:
directories:
- ${HOME}/.pyenv_cache

- stage: integration
name: py37-macos
os: osx
language: generic
osx_image: xcode7.3
env:
- PYENV_VERSION=3.7.3
before_install: source setup-pyenv.sh
<<: *_step_test
cache:
directories:
- ${HOME}/.pyenv_cache

- stage: integration
name: py27-linux
language: python
python: "2.7"
<<: *_step_test

- stage: integration
name: py36-linux
language: python
python: "3.6"
<<: *_step_test

- stage: integration
name: py37-linux
language: python
dist: xenial
python: "3.7"
<<: *_step_test

- stage: release
name: macos
os: osx
language: generic
osx_image: xcode7.3
env:
- PYENV_VERSION=3.7.3
before_install: source setup-pyenv.sh
<<: *_step_release
cache:
directories:
- ${HOME}/.pyenv_cache

- stage: release
name: linux
language: python
dist: xenial
python: "3.7"
<<: *_step_release

- stage: release
name: windows
os: windows
language: shell
env:
- PATH=/c/Python37:/c/Python37/Scripts:$PATH
before_install:
- *_disable_windows_compression
- *_disable_windows_defender
- choco install make
- choco install python --version 3.7.3
<<: *_step_release
37 changes: 37 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
SHELL := /bin/bash

PROJECT_NAME=$(shell dirname "$0")
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))

.PHONY: test help
.DEFAULT_GOAL := ci

ci: lint test ## Equivelant to 'make lint test'

help: ## Show this help message.

@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

dep: ## Install the dependent libraries.

pip install -r test-requirements.txt
pip install -e .

lint: dep ## Run lint validations.

flake8 q/ --count --select=E901,E999,F821,F822,F823 --show-source --statistics

test: dep ## Run the unit tests.

test/test-all
## TODO Bring back pytest
## py.test -rs -c pytest.ini -s -v q/tests/suite.py --rootdir .

release: ## Run release
pip install py-ci
pyci release --no-wheel-publish --wheel-universal

local-release:
pip install py-ci
./do-manual-release.sh

13 changes: 7 additions & 6 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/). It c
## Installation.
Extremely simple.

Instructions for all OSs are [here](http://harelba.github.io/q/install.html).
Instructions for all OSs are [here](http://harelba.github.io/q/#installation).

## Examples

Expand All @@ -20,18 +20,19 @@ q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3"
ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3"
```

Go [here](http://harelba.github.io/q/examples.html) for more examples.
Go [here](http://harelba.github.io/q/#examples) for more examples.

## Python API
A development branch for exposing q's capabilities as a <strong>Python module</strong> can be viewed <a href="https://github.com/harelba/q/tree/generic-injected-streams/PYTHON-API.markdown">here</a>, along with examples of the alpha version of the API.<br/>Existing functionality as a command-line tool will not be affected by this. Your input will be most appreciated.

## Change log
Click [here](http://harelba.github.io/q/changelog.html) to see the change log.

## Contact
Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course.

Harel Ben-Attia, [email protected], [@harelba](https://twitter.com/harelba) on Twitter
Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/)

Twitter [@harelba](https://twitter.com/harelba)

Email [[email protected]](mailto:[email protected])

q on twitter: #qtextasdata

7 changes: 7 additions & 0 deletions bin/__version__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env python

q_version = '2.0.12'


if __name__ == '__main__':
print(q_version)
55 changes: 38 additions & 17 deletions bin/q → bin/q.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

q_version = "2.0.6"
from .__version__ import q_version

__all__ = [ 'QTextAsData' ]

Expand Down Expand Up @@ -476,16 +475,18 @@ def __init__(self, mode, expected_column_count, input_delimiter, skip_header=Fal
self.rows = []
self.skip_header = skip_header
self.header_row = None
self.header_row_filename = None
self.expected_column_count = expected_column_count
self.input_delimiter = input_delimiter
self.disable_column_type_detection = disable_column_type_detection

def analyze(self, col_vals):
def analyze(self, filename, col_vals):
if self.inferred:
raise Exception("Already inferred columns")

if self.skip_header and self.header_row is None:
self.header_row = col_vals
self.header_row_filename = filename
else:
self.rows.append(col_vals)

Expand Down Expand Up @@ -905,17 +906,36 @@ def _pre_populate(self,dialect):
mfs = MaterializedFileState(filename,f,self.encoding,dialect,is_stdin)
self.materialized_file_dict[filename] = mfs

def _should_skip_extra_headers(self, filenumber, filename, mfs, col_vals):
if not self.skip_header:
return False

if filenumber == 0:
return False

header_already_exists = self.column_inferer.header_row is not None

is_extra_header = self.skip_header and mfs.lines_read == 1 and header_already_exists

if is_extra_header:
if tuple(self.column_inferer.header_row) != tuple(col_vals):
raise BadHeaderException("Extra header {} in file {} mismatches original header {} from file {}. Table name is {}".format(",".join(col_vals),mfs.filename,",".join(self.column_inferer.header_row),self.column_inferer.header_row_filename,self.filenames_str))

return is_extra_header

def _populate(self,dialect,stop_after_analysis=False):
total_data_lines_read = 0

# For each match
for filename in self.materialized_file_list:
for filenumber,filename in enumerate(self.materialized_file_list):
mfs = self.materialized_file_dict[filename]

try:
try:
for col_vals in mfs.read_file_using_csv():
self._insert_row(col_vals)
if self._should_skip_extra_headers(filenumber,filename,mfs,col_vals):
continue
self._insert_row(filename, col_vals)
if stop_after_analysis and self.column_inferer.inferred:
return
if mfs.lines_read == 0 and self.skip_header:
Expand All @@ -937,7 +957,7 @@ def _populate(self,dialect,stop_after_analysis=False):

if not self.table_created:
self.column_inferer.force_analysis()
self._do_create_table()
self._do_create_table(filename)


if total_data_lines_read == 0:
Expand All @@ -960,20 +980,20 @@ def populate(self,dialect,stop_after_analysis=False):
self.state = TableCreatorState.FULLY_READ
return

def _flush_pre_creation_rows(self):
def _flush_pre_creation_rows(self, filename):
for i, col_vals in enumerate(self.pre_creation_rows):
if self.skip_header and i == 0:
# skip header line
continue
self._insert_row(col_vals)
self._insert_row(filename, col_vals)
self._flush_inserts()
self.pre_creation_rows = []

def _insert_row(self, col_vals):
def _insert_row(self, filename, col_vals):
# If table has not been created yet
if not self.table_created:
# Try to create it along with another "example" line of data
self.try_to_create_table(col_vals)
self.try_to_create_table(filename, col_vals)

# If the table is still not created, then we don't have enough data, just
# store the data and return
Expand Down Expand Up @@ -1069,19 +1089,19 @@ def _flush_inserts(self):
# print self.db.execute_and_fetch(self.db.generate_end_transaction())
self.buffered_inserts = []

def try_to_create_table(self, col_vals):
def try_to_create_table(self, filename, col_vals):
if self.table_created:
raise Exception('Table is already created')

# Add that line to the column inferer
result = self.column_inferer.analyze(col_vals)
result = self.column_inferer.analyze(filename, col_vals)
# If inferer succeeded,
if result:
self._do_create_table()
self._do_create_table(filename)
else:
pass # We don't have enough information for creating the table yet

def _do_create_table(self):
def _do_create_table(self,filename):
# Then generate a temp table name
self.table_name = self.db.generate_temp_table_name()
# Get the column definition dict from the inferer
Expand All @@ -1101,7 +1121,7 @@ def _do_create_table(self):
self.db.execute_and_fetch(create_table_stmt)
# Mark the table as created
self.table_created = True
self._flush_pre_creation_rows()
self._flush_pre_creation_rows(filename)

def drop_table(self):
if self.table_created:
Expand All @@ -1122,7 +1142,8 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter):

def print_credentials():
print("q version %s" % q_version, file=sys.stderr)
print("Copyright (C) 2012-2017 Harel Ben-Attia ([email protected], @harelba on twitter)", file=sys.stderr)
print("Python: %s" % " // ".join([str(x).strip() for x in sys.version.split("\n")]), file=sys.stderr)
print("Copyright (C) 2012-2019 Harel Ben-Attia ([email protected], @harelba on twitter)", file=sys.stderr)
print("http://harelba.github.io/q/", file=sys.stderr)
print(file=sys.stderr)

Expand Down Expand Up @@ -1403,7 +1424,7 @@ def _execute(self,query_str,input_params=None,stdin_file=None,stdin_filename='-'
msg = str(e)
error = QError(e,"query error: %s" % msg,1)
if "no such column" in msg and effective_input_params.skip_header:
warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names'))
warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names. Another issue might be that the file contains a BOM. Files that are encoded with UTF8 and contain a BOM can be read by specifying `-e utf-9-sig` in the command line. Support for non-UTF8 encoding will be provided in the future.'))
except ColumnCountMismatchException as e:
error = QError(e,e.msg,2)
except (UnicodeDecodeError, UnicodeError) as e:
Expand Down
Loading

0 comments on commit 7abaab5

Please sign in to comment.