Browse Source

splitted pymseed functionality into several files

ims_to_stationxml
Sebastian Heimann 11 years ago
commit
1e26f3b939
11 changed files with 1488 additions and 0 deletions
  1. BIN
      libmseed-2.1.3.tar.gz
  2. +0
    -0
      pyrocko/__init__.py
  3. +3
    -0
      pyrocko/config.py
  4. +51
    -0
      pyrocko/io.py
  5. +107
    -0
      pyrocko/mseed.py
  6. +304
    -0
      pyrocko/mseed_ext.c
  7. +439
    -0
      pyrocko/pile.py
  8. +259
    -0
      pyrocko/sac.py
  9. +224
    -0
      pyrocko/trace.py
  10. +86
    -0
      pyrocko/util.py
  11. +15
    -0
      setup.py

BIN
libmseed-2.1.3.tar.gz View File


+ 0
- 0
pyrocko/__init__.py View File


+ 3
- 0
pyrocko/config.py View File

@ -0,0 +1,3 @@
show_progress = True

+ 51
- 0
pyrocko/io.py View File

@ -0,0 +1,51 @@
import mseed, sac
import trace
from pyrocko.mseed_ext import MSEEDERROR
class FileLoadError(Exception):
pass
def make_substitutions(tr, substitutions):
if substitutions:
for k,v in substitutions.iteritems():
if hasattr(tr, k):
setattr(tr, k, v)
def load(filename, format='mseed', getdata=True, substitutions=None ):
trs = []
if format == 'mseed':
try:
for tr in mseed.load(filename, getdata):
trs.append(tr)
except (OSError, MSEEDERROR), e:
raise FileLoadError(e)
elif format == 'sac':
mtime = os.stat(filename)[8]
try:
sac = sac.SacFile(filename, get_data=getdata)
tr = sac.to_trace()
tr.set_mtime(mtime)
trs.append(tr)
except (OSError,SacError), e:
raise FileLoadError(e)
for tr in trs:
make_substitutions(tr, substitutions)
return trs
def save(traces, filename_template, format='mseed'):
if format == 'mseed':
mseed.save(traces, filename_template)
elif format == 'sac':
for tr in traces:
f = sac.SacFile(from_trace=tr)
f.write(tr.fill_template(filename_template))

+ 107
- 0
pyrocko/mseed.py View File

@ -0,0 +1,107 @@
import pyrocko.mseed_ext
from pyrocko.mseed_ext import HPTMODULUS, MSEEDERROR
import trace
import os
from util import reuse
def load(filename, getdata=True):
mtime = os.stat(filename)[8]
traces = []
for tr in pyrocko.mseed_ext.get_traces( filename, getdata ):
network, station, location, channel = tr[1:5]
tmin = float(tr[5])/float(HPTMODULUS)
tmax = float(tr[6])/float(HPTMODULUS)
deltat = reuse(float(1.0)/float(tr[7]))
ydata = tr[8]
traces.append(trace.Trace(network, station, location, channel, tmin, tmax, deltat, ydata, mtime=mtime))
return traces
def as_tuple(tr):
itmin = int(round(tr.tmin*HPTMODULUS))
itmax = int(round(tr.tmax*HPTMODULUS))
srate = 1.0/tr.deltat
return (tr.network, tr.station, tr.location, tr.channel,
itmin, itmax, srate, tr.get_ydata())
def save(traces, filename_template):
fn_tr = {}
for tr in traces:
fn = tr.fill_template(filename_template)
if fn not in fn_tr:
fn_tr[fn] = []
fn_tr[fn].append(tr)
for fn, traces_thisfile in fn_tr.items():
trtups = []
traces_thisfile.sort(lambda a,b: cmp(a.full_id, b.full_id))
for tr in traces_thisfile:
trtups.append(as_tuple(tr))
pyrocko.mseed_ext.store_traces(trtups, fn)
return fn_tr.keys()
if __name__ == '__main__':
import unittest
import numpy as num
import time
import tempfile
import random
from random import choice as rc
from os.path import join as pjoin
abc = 'abcdefghijklmnopqrstuvwxyz'
def rn(n):
return ''.join( [ random.choice(abc) for i in xrange(n) ] )
class MSeedTestCase( unittest.TestCase ):
def testWriteRead(self):
now = time.time()
n = 10
deltat = 0.1
networks = [ rn(2) for i in range(5) ]
traces1 = [ trace.Trace(rc(networks), rn(4), rn(2), rn(3), tmin=now+i*deltat*n*2, deltat=deltat, ydata=num.arange(n), mtime=now)
for i in range(100) ]
tempdir = tempfile.mkdtemp()
fns = save(traces1, pjoin(tempdir, '%(network)s'))
traces2 = []
for fn in fns:
traces2.extend(load(fn))
for tr in traces1:
assert tr in traces2
for fn in fns:
os.remove(fn)
def testReadNonexistant(self):
try:
trs = load('/tmp/thisfileshouldnotexist')
except OSError, e:
pass
assert isinstance(e, OSError)
def testReadEmpty(self):
tempfn = tempfile.mkstemp()[1]
try:
trs = load(tempfn)
except MSEEDERROR, e:
pass
assert str(e).find('No SEED data detected') != -1
os.remove(tempfn)
unittest.main()

+ 304
- 0
pyrocko/mseed_ext.c View File

@ -0,0 +1,304 @@
/* Copyright (c) 2009, Sebastian Heimann <sebastian.heimann@zmaw.de>
This file is part of pyrocko. For licensing information please see the file
COPYING which is included with pyrocko. */
#include "Python.h"
#include "numpy/arrayobject.h"
#include <libmseed.h>
#include <assert.h>
static PyObject *MseedError;
#define BUFSIZE 1024
static PyObject*
mseed_get_traces (PyObject *dummy, PyObject *args)
{
char *filename;
MSTraceGroup *mstg = NULL;
MSTrace *mst = NULL;
int retcode;
npy_intp array_dims[1] = {0};
PyObject *array = NULL;
PyObject *out_traces = NULL;
PyObject *out_trace = NULL;
int numpytype;
char strbuf[BUFSIZE];
PyObject *unpackdata = NULL;
if (!PyArg_ParseTuple(args, "sO", &filename, &unpackdata)) {
PyErr_SetString(MseedError, "usage get_traces(filename, dataflag)" );
return NULL;
}
if (!PyBool_Check(unpackdata)) {
PyErr_SetString(MseedError, "Second argument must be a boolean" );
return NULL;
}
/* get data from mseed file */
retcode = ms_readtraces (&mstg, filename, 0, -1.0, -1.0, 0, 1, (unpackdata == Py_True), 0);
if ( retcode < 0 ) {
snprintf (strbuf, BUFSIZE, "Cannot read file '%s': %s", filename, ms_errorstr(retcode));
PyErr_SetString(MseedError, strbuf);
return NULL;
}
if ( ! mstg ) {
snprintf (strbuf, BUFSIZE, "Error reading file");
PyErr_SetString(MseedError, strbuf);
return NULL;
}
/* check that there is data in the traces */
if (unpackdata == Py_True) {
mst = mstg->traces;
while (mst) {
if (mst->datasamples == NULL) {
snprintf (strbuf, BUFSIZE, "Error reading file - datasamples is NULL");
PyErr_SetString(MseedError, strbuf);
return NULL;
}
mst = mst->next;
}
}
out_traces = Py_BuildValue("[]");
mst = mstg->traces;
/* convert data to python tuple */
while (mst) {
if (unpackdata == Py_True) {
array_dims[0] = mst->numsamples;
switch (mst->sampletype) {
case 'i':
assert( ms_samplesize('i') == 4 );
numpytype = NPY_INT32;
break;
case 'a':
assert( ms_samplesize('a') == 1 );
numpytype = NPY_INT8;
break;
case 'f':
assert( ms_samplesize('f') == 4 );
numpytype = NPY_FLOAT32;
break;
case 'd':
assert( ms_samplesize('d') == 8 );
numpytype = NPY_FLOAT64;
break;
default:
snprintf (strbuf, BUFSIZE, "Unknown sampletype %c\n", mst->sampletype);
PyErr_SetString(MseedError, strbuf);
Py_XDECREF(out_traces);
return NULL;
}
array = PyArray_SimpleNew(1, array_dims, numpytype);
memcpy( PyArray_DATA(array), mst->datasamples, mst->numsamples*ms_samplesize(mst->sampletype) );
} else {
Py_INCREF(Py_None);
array = Py_None;
}
out_trace = Py_BuildValue( "(c,s,s,s,s,L,L,d,N)",
mst->dataquality,
mst->network,
mst->station,
mst->location,
mst->channel,
mst->starttime,
mst->endtime,
mst->samprate,
array );
PyList_Append(out_traces, out_trace);
Py_DECREF(out_trace);
mst = mst->next;
}
mst_freegroup (&mstg);
return out_traces;
}
static void record_handler (char *record, int reclen, void *outfile) {
if ( fwrite(record, reclen, 1, outfile) != 1 ) {
fprintf(stderr, "Error writing mseed record to output file\n");
}
}
static PyObject*
mseed_store_traces (PyObject *dummy, PyObject *args)
{
char *filename;
MSTrace *mst = NULL;
PyObject *array = NULL;
PyObject *in_traces = NULL;
PyObject *in_trace = NULL;
PyArrayObject *contiguous_array = NULL;
int i;
char *network, *station, *location, *channel;
char mstype;
int msdetype;
int psamples, precords;
int numpytype;
int length;
FILE *outfile;
if (!PyArg_ParseTuple(args, "Os", &in_traces, &filename)) {
PyErr_SetString(MseedError, "usage store_traces(traces, filename)" );
return NULL;
}
if (!PySequence_Check( in_traces )) {
PyErr_SetString(MseedError, "Traces is not of sequence type." );
return NULL;
}
outfile = fopen(filename, "w" );
if (outfile == NULL) {
PyErr_SetString(MseedError, "Error opening file.");
return NULL;
}
for (i=0; i<PySequence_Length(in_traces); i++) {
in_trace = PySequence_GetItem(in_traces, i);
if (!PyTuple_Check(in_trace)) {
PyErr_SetString(MseedError, "Trace record must be a tuple of (network, station, location, channel, starttime, endtime, samprate, data)." );
Py_DECREF(in_trace);
return NULL;
}
mst = mst_init (NULL);
if (!PyArg_ParseTuple(in_trace, "ssssLLdO",
&network,
&station,
&location,
&channel,
&(mst->starttime),
&(mst->endtime),
&(mst->samprate),
&array )) {
PyErr_SetString(MseedError, "Trace record must be a tuple of (network, station, location, channel, starttime, endtime, samprate, data)." );
mst_free( &mst );
Py_DECREF(in_trace);
return NULL;
}
strncpy( mst->network, network, 10);
strncpy( mst->station, station, 10);
strncpy( mst->location, location, 10);
strncpy( mst->channel, channel, 10);
mst->network[10] = '\0';
mst->station[10] = '\0';
mst->location[10] ='\0';
mst->channel[10] = '\0';
if (!PyArray_Check(array)) {
PyErr_SetString(MseedError, "Data must be given as NumPy array." );
mst_free( &mst );
Py_DECREF(in_trace);
return NULL;
}
numpytype = PyArray_TYPE(array);
switch (numpytype) {
case NPY_INT32:
assert( ms_samplesize('i') == 4 );
mstype = 'i';
msdetype = DE_STEIM1;
break;
case NPY_INT8:
assert( ms_samplesize('a') == 1 );
mstype = 'a';
msdetype = DE_ASCII;
break;
case NPY_FLOAT32:
assert( ms_samplesize('f') == 4 );
mstype = 'f';
msdetype = DE_FLOAT32;
break;
case NPY_FLOAT64:
assert( ms_samplesize('d') == 8 );
mstype = 'd';
msdetype = DE_FLOAT64;
break;
default:
PyErr_SetString(MseedError, "Data must be of type float64, float32, int32 or int8.");
mst_free( &mst );
Py_DECREF(in_trace);
return NULL;
}
mst->sampletype = mstype;
contiguous_array = PyArray_GETCONTIGUOUS((PyArrayObject*)array);
length = PyArray_SIZE(contiguous_array);
mst->numsamples = length;
mst->samplecnt = length;
mst->datasamples = calloc(length,ms_samplesize(mstype));
memcpy(mst->datasamples, PyArray_DATA(contiguous_array), length*ms_samplesize(mstype));
Py_DECREF(contiguous_array);
precords = mst_pack (mst, &record_handler, outfile, 4096, msdetype,
1, &psamples, 1, 0, NULL);
mst_free( &mst );
Py_DECREF(in_trace);
}
fclose( outfile );
Py_INCREF(Py_None);
return Py_None;
}
static PyMethodDef MSEEDMethods[] = {
{"get_traces", mseed_get_traces, METH_VARARGS,
"get_traces(filename, dataflag)\n"
"Get all traces stored in an mseed file.\n\n"
"Returns a list of tuples, one tuple for each trace in the file. Each tuple\n"
"has 9 elements:\n\n"
" (dataquality, network, station, location, channel,\n"
" startime, endtime, samprate, data)\n\n"
"These come straight from the MSTrace data structure, defined and described\n"
"in libmseed. If dataflag is True, `data` is a numpy array containing the\n"
"data. If dataflag is False, the data is not unpacked and `data` is None.\n" },
{"store_traces", mseed_store_traces, METH_VARARGS,
"store_traces(traces, filename)\n" },
{NULL, NULL, 0, NULL} /* Sentinel */
};
PyMODINIT_FUNC
initmseed_ext(void)
{
PyObject *m;
PyObject *hptmodulus;
m = Py_InitModule("mseed_ext", MSEEDMethods);
if (m == NULL) return;
import_array();
MseedError = PyErr_NewException("mseed_ext.error", NULL, NULL);
Py_INCREF(MseedError); /* required, because other code could remove `error`
from the module, what would create a dangling
pointer. */
PyModule_AddObject(m, "MSEEDERROR", MseedError);
hptmodulus = Py_BuildValue("i", HPTMODULUS);
/* no incref here because `hptmodulus` is not needed
in the c code and it could be safely removed from
the module. */
PyModule_AddObject(m, "HPTMODULUS", hptmodulus);
}

+ 439
- 0
pyrocko/pile.py View File

@ -0,0 +1,439 @@
import trace, io, util, config
import numpy as num
class TracesGroup(object):
def __init__(self):
self.empty()
def empty(self):
self.networks, self.stations, self.locations, self.channels, self.nslc_ids = [ set() for x in range(5) ]
self.tmin, self.tmax = num.inf, -num.inf
def update_from_content(self, content):
self.empty()
for c in content:
if isinstance(c, Group):
self.networks.update( c.networks )
self.stations.update( c.stations )
self.locations.update( c.locations )
self.channels.update( c.channels )
self.nslc_ids.update( c.nslc_ids )
elif isinstance(c, trace.Trace):
self.networks.add(c.network)
self.stations.add(c.station)
self.locations.add(c.location)
self.channels.add(c.channel)
self.nslc_ids.add(c.nslc_id)
self.tmin = min(self.tmin, c.tmin)
self.tmax = max(self.tmax, c.tmax)
self._convert_small_sets_to_tuples()
def overlaps(self, tmin,tmax):
return not (tmax < self.tmin or self.tmax < tmin)
def is_relevant(self, tmin, tmax, selector=None):
return not (tmax <= self.tmin or self.tmax < tmin) and (selector is None or selector(self))
def _convert_small_sets_to_tuples(self):
if len(self.networks) < 32:
self.networks = reuse(tuple(self.networks))
if len(self.stations) < 32:
self.stations = reuse(tuple(self.stations))
if len(self.locations) < 32:
self.locations = reuse(tuple(self.locations))
if len(self.channels) < 32:
self.channels = reuse(tuple(self.channels))
if len(self.nslc_ids) < 32:
self.nslc_ids = reuse(tuple(self.nslc_ids))
class TracesFile(TracesGroup):
def __init__(self, abspath, format, substitutions=None, mtime=None):
self.abspath = abspath
self.format = format
self.traces = []
self.data_loaded = False
self.substitutions = substitutions
self.load_headers(mtime=mtime)
def load_headers(self, mtime=None):
if mtime is None:
self.mtime = os.stat(self.abspath)[8]
for tr in io.load(self.abspath, format=self.format, getdata=False, substitutions=self.substitutions):
self.traces.append(tr)
self.data_loaded = False
self.update_from_content(self.traces)
def load_data(self):
logging.info('loading data from file: %s' % self.abspath)
self.traces = []
for tr in io.load(self.abspath, format=self.format, getdata=True, substitutions=self.substitutions):
self.traces.append(tr)
self.data_loaded = True
self.update_from_contents(self.traces)
def drop_data(self):
logging.info('forgetting data of file: %s' % self.abspath)
for tr in self.traces:
tr.drop_data()
self.data_loaded = False
def reload_if_modified(self):
mtime = os.stat(self.abspath)[8]
if mtime != self.mtime:
self.mtime = mtime
if self.data_loaded:
self.load_data(self)
else:
self.load_headers(self)
def chop(self,tmin,tmax,selector):
chopped = []
for trace in self.traces:
if selector(trace):
chopped_trace = trace.chop(tmin,tmax,inplace=False)
if chopped_trace is not None:
chopped_trace.mtime = self.mtime
chopped.append(chopped_trace)
return chopped
def get_deltats(self):
deltats = set()
for trace in self.traces:
deltats.add(trace.deltat)
return deltats
def iter_traces(self):
for trace in self.traces:
yield trace
def gather_keys(self, gather):
keys = set()
for trace in self.traces:
keys.add(gather(trace))
return keys
def __str__(self):
def sl(s):
return sorted(list(s))
s = 'TracesFile\n'
s += 'abspath: %s\n' % self.abspath
s += 'file mtime: %s\n' % util.gmctime(self.mtime)
s += 'number of traces: %i\n' % len(self.traces)
s += 'timerange: %s - %s\n' % (util.gmctime(self.tmin), util.gmctime(self.tmax))
s += 'networks: %s\n' % ', '.join(sl(self.networks))
s += 'stations: %s\n' % ', '.join(sl(self.stations))
s += 'locations: %s\n' % ', '.join(sl(self.locations))
s += 'channels: %s\n' % ', '.join(sl(self.channels))
return s
def load_cache(cachefilename):
if os.path.isfile(cachefilename):
progress_beg('reading cache...')
f = open(cachefilename,'r')
cache = pickle.load(f)
f.close()
progress_end()
else:
cache = {}
# weed out files which no longer exist
progress_beg('weeding cache...')
for fn in cache.keys():
if not os.path.isfile(fn):
del cache[fn]
progress_end()
return cache
def dump_cache(cache, cachefilename):
progress_beg('writing cache...')
f = open(cachefilename+'.tmp','w')
pickle.dump(cache, f)
f.close()
os.rename(cachefilename+'.tmp', cachefilename)
progress_end()
class FilenameAttributeError(Exception):
pass
class Pile(TracesGroup):
def __init__(self, filenames, cachefilename=None, filename_attributes=None):
msfiles = []
if filenames:
# should lock cache here...
if cachefilename:
cache = load_cache(cachefilename)
else:
cache = {}
if config.show_progress:
widgets = ['Scanning files', ' ',
progressbar.Bar(marker='-',left='[',right=']'), ' ',
progressbar.Percentage(), ' ',]
pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(filenames)).start()
regex = None
if filename_attributes:
regex = re.compile(filename_attributes)
failures = []
cache_modified = False
for ifile, filename in enumerate(filenames):
try:
abspath = os.path.abspath(filename)
substitutions = None
if regex:
m = regex.search(filename)
if not m: raise FilenameAttributeError(
"Cannot get attributes with pattern '%s' from path '%s'"
% (filename_attributes, filename))
substitutions = m.groupdict()
mtime = os.stat(filename)[8]
if abspath not in cache or cache[abspath].mtime != mtime or substitutions:
cache[abspath] = TracesFile(abspath, format, substitutions=substitutions, mtime=mtime)
if not substitutions:
cache_modified = True
except (io.FileLoadError, OSError, FilenameAttributeError), xerror:
failures.append(abspath)
logging.warn(xerror)
else:
msfiles.append(cache[abspath])
if config.show_progress: pbar.update(ifile+1)
if config.show_progress: pbar.finish()
if failures:
logging.warn('The following file%s caused problems and will be ignored:\n' % plural_s(len(failures)) + '\n'.join(failures))
if cachefilename and cache_modified: dump_cache(cache, cachefilename)
# should unlock cache here...
self.msfiles = msfiles
self.update_from_contents(self.msfiles)
self.open_files = set()
def chopper(self, tmin=None, tmax=None, tinc=None, tpad=0., selector=None,
want_incomplete=True, degap=True, keep_current_files_open=False,
ndecimate=None):
if tmin is None:
tmin = self.tmin+tpad
if tmax is None:
tmax = self.tmax-tpad
if tinc is None:
tinc = tmax-tmin
if not self.is_relevant(tmin,tmax,selector): return
files_match_full = [ f for f in self.msfiles if f.is_relevant( tmin-tpad, tmax+tpad, selector ) ]
if not files_match_full: return
ftmin = num.inf
ftmax = -num.inf
for f in files_match_full:
ftmin = min(ftmin,f.tmin)
ftmax = max(ftmax,f.tmax)
iwin = max(0, int(((ftmin-tpad)-tmin)/tinc-2))
files_match_partial = files_match_full
partial_every = 50
while True:
chopped = []
wmin, wmax = tmin+iwin*tinc, tmin+(iwin+1)*tinc
if wmin >= ftmax or wmin >= tmax: break
if iwin%partial_every == 0: # optimization
swmin, swmax = tmin+iwin*tinc, tmin+(iwin+partial_every)*tinc
files_match_partial = [ f for f in files_match_full if f.is_relevant( swmin-tpad, swmax+tpad, selector ) ]
files_match_win = [ f for f in files_match_partial if f.is_relevant( wmin-tpad, wmax+tpad, selector ) ]
if files_match_win:
used_files = set()
for file in files_match_win:
used_files.add(file)
if not file.data_loaded:
self.open_files.add(file)
file.load_data()
chopped.extend( file.chop(wmin-tpad, wmax+tpad, selector) )
chopped.sort(lambda a,b: cmp(a.full_id, b.full_id))
if degap:
chopped = degapper(chopped)
if not want_incomplete:
wlen = (wmax+tpad)-(wmin-tpad)
chopped_weeded = []
for trace in chopped:
if abs(wlen - round(wlen/trace.deltat)*trace.deltat) > 0.001:
logging.warn('Selected window length (%g) not nicely divideable by sampling interval (%g).' % (wlen, trace.deltat) )
if len(trace.ydata) == t2ind((wmax+tpad)-(wmin-tpad), trace.deltat):
chopped_weeded.append(trace)
chopped = chopped_weeded
if ndecimate is not None:
for trace in chopped:
trace.downsample(ndecimate)
yield chopped
unused_files = self.open_files - used_files
for file in unused_files:
file.drop_data()
self.open_files.remove(file)
iwin += 1
if not keep_current_files_open:
while self.open_files:
file = self.open_files.pop()
file.drop_data()
def all(self, *args, **kwargs):
alltraces = []
for traces in self.chopper( *args, **kwargs ):
alltraces.extend( traces )
return alltraces
def iter_all(self, *args, **kwargs):
for traces in self.chopper( *args, **kwargs):
for trace in traces:
yield trace
def gather_keys(self, gather):
keys = set()
for file in self.msfiles:
keys |= file.gather_keys(gather)
return sorted(keys)
def get_deltats(self):
deltats = set()
for file in self.msfiles:
deltats.update(file.get_deltats())
return sorted(list(deltats))
def iter_traces(self, load_data=False, return_abspath=False):
for file in self.msfiles:
must_close = False
if load_data and not file.data_loaded:
file.load_data()
must_close = True
for trace in file.iter_traces():
if return_abspath:
yield file.abspath, trace
else:
yield trace
if must_close:
file.drop_data()
def reload_modified(self):
for file in self.msfiles:
file.reload_if_modified()
def __str__(self):
def sl(s):
return sorted([ x for x in s ])
s = 'Pile\n'
s += 'number of files: %i\n' % len(self.msfiles)
s += 'timerange: %s - %s\n' % (util.gmctime(self.tmin), util.gmctime(self.tmax))
s += 'networks: %s\n' % ', '.join(sl(self.networks))
s += 'stations: %s\n' % ', '.join(sl(self.stations))
s += 'locations: %s\n' % ', '.join(sl(self.locations))
s += 'channels: %s\n' % ', '.join(sl(self.channels))
return s
if __name__ == '__main__':
import unittest
import numpy as num
def makeManyFiles( nfiles=200, nsamples=100000):
import tempfile
import random
from random import choice as rc
from os.path import join as pjoin
abc = 'abcdefghijklmnopqrstuvwxyz'
def rn(n):
return ''.join( [ random.choice(abc) for i in xrange(n) ] )
stations = [ rn(4) for i in xrange(10) ]
components = [ rn(3) for i in xrange(3) ]
networks = [ 'xx' ]
datadir = tempfile.mkdtemp()
traces = []
for i in xrange(nfiles):
tmin = 1234567890+i*60*60*24*10 # random.randint(1,int(time.time()))
deltat = 1.0
data = num.ones(nsamples)
traces.append(trace.Trace(rc(networks), rc(stations),'',rc(components), tmin, None, deltat, data))
fnt = pjoin( datadir, '%(network)s-%(station)s-%(location)s-%(channel)s-%(tmin)s.mseed', 'mseed')
io.save(traces, fnt)
return datadir
class MSeedTestCase( unittest.TestCase ):
def testPileTraversal(self):
import tempfile, shutil
config.show_progress = False
nfiles = 200
nsamples = 100000
datadir = makeManyFiles(nfiles=nfiles, nsamples=nsamples)
filenames = select_files([datadir])
cachefilename = pjoin(datadir,'_cache_')
pile = MSeedPile(filenames, cachefilename)
s = 0
for traces in pile.chopper(tmin=None, tmax=None, tinc=1234.): #tpad=10.):
for trace in traces:
s += num.sum(trace.ydata)
os.unlink(cachefilename)
shutil.rmtree(datadir)
assert s == nfiles*nsamples
unittest.main()

+ 259
- 0
pyrocko/sac.py View File

@ -0,0 +1,259 @@
'''SAC IO library for Python'''
import struct, sys, logging, math, time
from calendar import timegm
from time import gmtime
import numpy as num
class SacError(Exception):
pass
class SacFile:
nbytes_header = 632
header_num_format = {'little': '<70f40i', 'big': '>70f40i'}
header_keys = '''
delta depmin depmax scale odelta b e o a internal0 t0 t1 t2 t3 t4 t5 t6 t7 t8 t9
f resp0 resp1 resp2 resp3 resp4 resp5 resp6 resp7 resp8 resp9 stla stlo stel
stdp evla evlo evel evdp mag user0 user1 user2 user3 user4 user5 user6 user7
user8 user9 dist az baz gcarc internal1 internal2 depmen cmpaz cmpinc xminimum
xmaximum yminimum ymaximum unused0 unused1 unused2 unused3 unused4 unused5
unused6 nzyear nzjday nzhour nzmin nzsec nzmsec nvhdr norid nevid npts internal3
nwfid nxsize nysize unused7 iftype idep iztype unused8 iinst istreg ievreg
ievtyp iqual isynth imagtyp imagsrc unused9 unused10 unused11 unused12 unused13
unused14 unused15 unused16 leven lpspol lovrok lcalda unused17 kstnm kevnm khole
ko ka kt0 kt1 kt2 kt3 kt4 kt5 kt6 kt7 kt8 kt9 kf kuser0 kuser1 kuser2 kcmpnm
knetwk kdatrd kinst
'''.split()
header_enum_symbols = '''
itime irlim iamph ixy iunkn idisp ivel iacc ib iday io ia it0 it1 it2 it3 it4
it5 it6 it7 it8 it9 iradnv itannv iradev itanev inorth ieast ihorza idown iup
illlbb iwwsn1 iwwsn2 ihglp isro inucl ipren ipostn iquake ipreq ipostq ichem
iother igood iglch idrop ilowsn irldta ivolts ixyz imb ims iml imw imd imx ineic
ipde iisc ireb iusgs ibrk icaltech illnl ievloc ijsop iuser iunknown iqb iqb1
iqb2 iqbx iqmt ieq ieq1 ieq2 ime iex inu inc io_ il ir it iu
'''.split()
header_num2name = dict([ (a+1,b) for (a,b) in enumerate(header_enum_symbols)])
header_name2num = dict([ (b,a+1) for (a,b) in enumerate(header_enum_symbols)])
header_types = 'f'*70 + 'i'*35 + 'l'*5 + 'k'*23
undefined_value = {'f':-12345.0, 'i':-12345, 'l':None, 'k': '-12345'}
ldefaults = {'leven': 1, 'lpspol': 0, 'lovrok': 1, 'lcalda': 1, 'unused17': 0}
t_lookup = dict(zip(header_keys, header_types))
u_lookup = dict([ (k, undefined_value[t_lookup[k]]) for k in header_keys ])
def ndatablocks(self):
'''Get number of data blocks for this file's type.'''
nblocks = { 'itime':1, 'irlim':2, 'iamph':2, 'ixy':2, 'ixyz':3 }[SacFile.header_num2name[self.iftype]]
if nblocks == 1 and not self.leven:
nblocks = 2 # not sure about this...
return nblocks
def val_or_none(self, k,v):
'''Replace SAC undef flags with None.'''
if SacFile.u_lookup[k] == v:
return None
else:
return v
def get_ref_time(self):
'''Get reference time as standard Unix timestamp.'''
if None in (self.nzyear, self.nzjday, self.nzhour, self.nzmin, self.nzsec, self.nzmsec):
raise SacError('Not all header values for reference time are set.')
return timegm((self.nzyear, 1, self.nzjday, self.nzhour, self.nzmin, self.nzsec)) + self.nzmsec/1000.
def set_ref_time(self, timestamp):
'''Set all header values defining reference time based on standard Unix timestamp.'''
secs = math.floor(timestamp)
msec = int(round((timestamp-secs)*1000.))
if msec == 1000:
secs += 1
msec = 0
t = gmtime(secs)
self.nzyear, self.nzjday, self.nzhour, self.nzmin, self.nzsec = t[0], t[7], t[3], t[4], t[5]
self.nzmsec = msec
def val_for_file(self, k,v):
'''Convert attribute value to the form required when writing it to the SAC file.'''
t = SacFile.t_lookup[k]
if v is None:
if t == 'l': return SacFile.ldefaults[k]
v = SacFile.u_lookup[k]
if t == 'f': return float(v)
elif t == 'i': return int(v)
elif t == 'l':
if v: return 1
return 0
elif t == 'k':
l = 8
if k == 'kevnm': l = 16 # only this header val has different length
return v.ljust(l)[:l]
def __init__(self, *args, **kwargs):
if args:
self.read(*args, **kwargs)
else:
self.clear()
def clear(self):
'''Empty file record.'''
for k in SacFile.header_keys:
self.__dict__[k] = None
# set the required attributes
self.nvhdr = 6
self.iftype = SacFile.header_name2num['itime']
self.leven = True
self.delta = 1.0
self.npts = 0
self.b = 0.0
self.e = 0.0
self.data = [ num.arange(0, dtype=num.float32) ]
def check(self):
'''Check the required header variables to have reasonable values.'''
if self.iftype not in [SacFile.header_name2num[x] for x in ('itime','irlim','iamph','ixy','ixyz')]:
raise SacError('Unknown SAC file type: %i.' % self.iftype)
if self.nvhdr < 1 or 20 < self.nvhdr:
raise SacError('Unreasonable SAC header version number found.')
if self.npts < 0:
raise SacError('Negative number of samples specified in NPTS header.')
if self.leven not in (0,1):
raise SacError('Header value LEVEN must be either 0 or 1.')
if self.leven and self.delta <= 0.0:
raise SacError('Header value DELTA should be positive for evenly spaced samples')
if self.b > self.e:
raise SacError('Beginning value of independent variable greater than its ending value.')
if self.nvhdr != 6:
logging.warn('This module has only been tested with SAC header version 6.'+
'This file has header version %i. It might still work though...' % self.nvhdr)
def read(self, filename, get_data=True, byte_sex='try'):
'''Read SAC file.
filename -- Name of SAC file.
get_data -- If True, the data is read, otherwise only read headers.
byte_sex -- Endianness: 'try', 'little' or 'big'
'''
nbh = SacFile.nbytes_header
# read in all data
f = open(filename,'rb')
if get_data:
filedata = f.read()
else:
filedata = f.read(nbh)
f.close()
if len(filedata) < nbh:
raise SacError('File too short to be a SAC file.')
# possibly try out different endiannesses
if byte_sex == 'try':
sexes = ('little','big')
else:
sexes = (byte_sex,)
for isex,sex in enumerate(sexes):
format = SacFile.header_num_format[sex]
nbn = struct.calcsize(format)
hv = list(struct.unpack(format, filedata[:nbn]))
strings = filedata[nbn:nbh]
hv.append(strings[:8].rstrip())
hv.append(strings[8:24].rstrip())
for i in xrange(len(strings[24:])/8):
hv.append(strings[24+i*8:24+(i+1)*8].rstrip())
self.header_vals = hv
for k, v in zip(SacFile.header_keys, self.header_vals):
self.__dict__[k] = self.val_or_none(k,v)
self.data = []
try:
self.check()
break
except SacError, e:
if isex == len(sexes)-1:
raise e
# possibly get data
if get_data:
nblocks = self.ndatablocks()
nbb = self.npts*4 # word length is always 4 bytes in sac files
for iblock in range(nblocks):
if len(filedata) < nbh+(iblock+1)*nbb:
raise SacError('File is incomplete.')
self.data.append(num.fromstring(filedata[nbh+iblock*nbb:nbh+(iblock+1)*nbb], dtype=num.float32))
if len(filedata) > nbh+nblocks*nbb:
sys.stderr.warn('Unused data at end of file.')
def write(self, filename, byte_sex='little'):
'''Write SAC file.'''
self.check()
# create header data
format = SacFile.header_num_format[byte_sex]
nbn = struct.calcsize(format)
numerical_values = []
string_values = []
for k in SacFile.header_keys:
v = self.__dict__[k]
vv = self.val_for_file(k,v)
if SacFile.t_lookup[k] == 'k':
string_values.append(vv)
else:
numerical_values.append(vv)
header_data = struct.pack(format, *numerical_values)
header_data += ''.join(string_values)
# check that enough data is available
nblocks = self.ndatablocks()
if len(self.data) != nblocks:
raise SacError('Need %i data blocks for file type %s.' % ( nblocks, SacFile.header_num2name[self.iftype] ))
for fdata in self.data:
if len(fdata) != self.npts:
raise SacError('Data length (%i) does not match NPTS header value (%i)' % (len(fdata), self.npts))
# dump data to file
f = open(filename, 'wb')
f.write(header_data)
for fdata in self.data:
f.write(fdata.astype(num.float32).tostring())
f.close()
def __str__(self):
str = ''
for k in SacFile.header_keys:
v = self.__dict__[k]
if v is not None:
str += '%s: %s\n' % (k, v)
return str
if __name__ == "__main__":
print SacFile(sys.argv[1])
s = SacFile()
fn = '/tmp/x.sac'
secs = timegm((2009,2,19,8,50,0))+0.1
s.set_ref_time(secs)
s.write(fn)
s2 = SacFile(fn)
assert(s2.nzjday == 50)
assert(s2.nzmsec == 100)

+ 224
- 0
pyrocko/trace.py View File

@ -0,0 +1,224 @@
import util, time
import numpy as num
from util import reuse
class NoData(Exception):
pass
class Trace(object):
def __init__(self, network='', station='STA', location='', channel='',
tmin=0., tmax=None, deltat=1., ydata=None, mtime=None, meta=None):
if mtime is None:
mtime = time.time()
self.network, self.station, self.location, self.channel = [reuse(x) for x in (network,station,location,channel)]
self.tmin = tmin
self.deltat = deltat
if tmax is None:
if ydata is not None:
self.tmax = self.tmin + (ydata.size-1)*self.deltat
else:
raise Exception('fixme: trace must be created with tmax or ydata')
else:
self.tmax = tmax
self.meta = meta
self.ydata = ydata
self.mtime = mtime
self.update_ids()
def __eq__(self, other):
return (self.network == other.network and
self.station == other.station and
self.location == other.location and
self.channel == other.channel and
self.deltat == other.deltat and
abs(self.tmin-other.tmin) < self.deltat*0.001 and
abs(self.tmax-other.tmax) < self.deltat*0.001 and
num.all(self.ydata == other.ydata))
def is_relevant(self, tmin, tmax, selector=None):
return not (tmax <= self.tmin or self.tmax < tmin) and (selector is None or selector(self))
def update_ids(self):
self.full_id = (self.network,self.station,self.location,self.channel,self.tmin)
self.nslc_id = reuse((self.network,self.station,self.location,self.channel))
def set_mtime(self, mtime):
self.mtime = mtime
def get_xdata(self):
return self.tmin + num.arange(len(self.ydata), dtype=num.float64) * self.deltat
def get_ydata(self):
if self.ydata is None: raise NoData()
return self.ydata
def drop_data(self):
self.ydata = None
def copy(self, data=True):
tracecopy = copy.copy(self)
if copydata:
tracecopy.ydata = self.ydata.copy()
tracecopy.meta = copy.deepcopy(self.meta)
return tracecopy
def chop(self, tmin, tmax, inplace=True):
if (tmax <= self.tmin or self.tmax < tmin): raise NoData()
ibeg = max(0, t2ind(tmin-self.tmin,self.deltat))
iend = min(len(self.ydata), t2ind(tmax-self.tmin,self.deltat))
obj = self
if not inplace:
obj = self.copy(data=False)
obj.ydata = self.ydata[ibeg:iend].copy()
obj.tmin = obj.tmin+ibeg*obj.deltat
obj.tmax = obj.tmin+(len(obj.ydata)-1)*obj.deltat
return obj
def downsample(self, ndecimate):
data = self.ydata.astype(num.float64)
data -= num.mean(data)
self.ydata = decimate(data, ndecimate, ftype='fir')
self.deltat = reuse(self.deltat*ndecimate)
self.tmax = self.tmin+(len(self.ydata)-1)*self.deltat
def downsample_to(self, deltat):
ratio = deltat/self.deltat
rratio = round(ratio)
if abs(rratio - ratio) > 0.0001: raise UnavailableDecimation('ratio = %g' % ratio)
deci_seq = decitab(int(rratio))
for ndecimate in deci_seq:
if ndecimate != 1:
self.downsample(ndecimate)
def lowpass(self, order, corner):
(b,a) = signal.butter(order, corner*2.0*self.deltat, btype='low')
data = self.ydata.astype(num.float64)
data -= num.mean(data)
self.ydata = signal.lfilter(b,a, data)
def highpass(self, order, corner):
(b,a) = signal.butter(order, corner*2.0*self.deltat, btype='high')
data = self.ydata.astype(num.float64)
data -= num.mean(data)
self.ydata = signal.lfilter(b,a, data)
def bandpass(self, order, corner_hp, corner_lp):
(b,a) = signal.butter(order, [corner*2.0*self.deltat for corner in (corner_hp, corner_lp)], btype='band')
data = self.ydata.astype(num.float64)
data -= num.mean(data)
self.ydata = signal.lfilter(b,a, data)
def bandpass_fft(self, corner_hp, corner_lp):
data = self.ydata.astype(num.float64)
n = len(data)
fdata = num.fft.rfft(data)
nf = len(fdata)
df = 1./(n*self.deltat)
freqs = num.arange(nf)*df
fdata *= num.logical_and(corner_hp < freqs, freqs < corner_lp)
data = num.fft.irfft(fdata,n)
assert len(data) == n
self.ydata = data
def shift(self, tshift):
self.tmin += tshift
self.tmax += tshift
def sta_lta_centered(self, tshort, tlong, quad=True):
nshort = tshort/self.deltat
nlong = tlong/self.deltat
if quad:
sqrdata = self.ydata**2
else:
sqrdata = self.ydata
mavg_short = moving_avg(sqrdata,nshort)
mavg_long = moving_avg(sqrdata,nlong)
self.ydata = num.maximum((mavg_short/mavg_long - 1.) * float(nshort)/float(nlong), 0.0)
def peaks(self, threshold, tsearch):
y = self.ydata
above = num.where(y > threshold, 1, 0)
itrig_positions = num.nonzero((above[1:]-above[:-1])>0)[0]
tpeaks = []
apeaks = []
for itrig_pos in itrig_positions:
ibeg = max(0,itrig_pos - 0.5*tsearch/self.deltat)
iend = min(len(self.ydata)-1, itrig_pos + 0.5*tsearch/self.deltat)
ipeak = num.argmax(y[ibeg:iend])
tpeak = self.tmin + (ipeak+ibeg)*self.deltat
apeak = y[ibeg+ipeak]
tpeaks.append(tpeak)
apeaks.append(apeak)
return tpeaks, apeaks
def transfer(self, tfade, freqlimits, transfer_function=None, cut_off_fading=True):
'''Return new trace with transfer function applied.
tfade -- rise/fall time in seconds of taper applied in timedomain at both ends of trace.
freqlimits -- 4-tuple with corner frequencies in Hz.
transfer_function -- FrequencyResponse object; must provide a method 'evaluate(freqs)', which returns the
transfer function coefficients at the frequencies 'freqs'.
cut_off_fading -- cut off rise/fall interval in output trace.
'''
if transfer_function is None:
transfer_function = FrequencyResponse()
if self.tmax - self.tmin <= tfade*2.:
raise TraceTooShort('trace too short for fading length setting. trace length = %g, fading length = %g' % (self.tmax-self.tmin, tfade))
ndata = self.ydata.size
ntrans = nextpow2(ndata*1.2)
coefs = self._get_tapered_coefs(ntrans, freqlimits, transfer_function)
data = self.ydata
data_pad = num.zeros(ntrans, dtype=num.float)
data_pad[:ndata] = data - data.mean()
data_pad[:ndata] *= costaper(0.,tfade, self.deltat*(ndata-1)-tfade, self.deltat*ndata, ndata, self.deltat)
fdata = num.fft.rfft(data_pad)
fdata *= coefs
ddata = num.fft.irfft(fdata)
output = self.copy()
output.ydata = ddata[:ndata]
if cut_off_fading:
output = output.chop(output.tmin+tfade, output.tmax-tfade)
else:
output.ydata = output.ydata.copy()
return output
def _get_tapered_coefs(self, ntrans, freqlimits, transfer_function):
deltaf = 1./(self.deltat*ntrans)
nfreqs = ntrans/2 + 1
transfer = num.ones(nfreqs, dtype=num.complex)
hi = snapper(nfreqs, deltaf)
a,b,c,d = freqlimits
freqs = num.arange(hi(d)-hi(a), dtype=num.float)*deltaf + hi(a)*deltaf
transfer[hi(a):hi(d)] = transfer_function.evaluate(freqs)
tapered_transfer = costaper(a,b,c,d, nfreqs, deltaf)*transfer
return tapered_transfer
def fill_template(self, template):
params = dict(zip( ('network', 'station', 'location', 'channel'), self.nslc_id))
params['tmin'] = util.gmctime_fn(self.tmin)
params['tmax'] = util.gmctime_fn(self.tmax)
return template % params
def __str__(self):
s = 'Trace (%s, %s, %s, %s)\n' % self.nslc_id
s += ' timerange: %s - %s\n' % (util.gmctime(self.tmin), util.gmctime(self.tmax))
s += ' delta t: %g\n' % self.deltat
return s

+ 86
- 0
pyrocko/util.py View File

@ -0,0 +1,86 @@
import time
from scipy import signal
def decimate(x, q, n=None, ftype='iir', axis=-1):
"""downsample the signal x by an integer factor q, using an order n filter
By default, an order 8 Chebyshev type I filter is used or a 30 point FIR
filter with hamming window if ftype is 'fir'.
(port to python of the GNU Octave function decimate.)
Inputs:
x -- the signal to be downsampled (N-dimensional array)
q -- the downsampling factor
n -- order of the filter (1 less than the length of the filter for a
'fir' filter)
ftype -- type of the filter; can be 'iir' or 'fir'
axis -- the axis along which the filter should be applied
Outputs:
y -- the downsampled signal
"""
if type(q) != type(1):
raise Error, "q should be an integer"
if n is None:
if ftype == 'fir':
n = 30
else:
n = 8
if ftype == 'fir':
b = signal.firwin(n+1, 1./q, window='hamming')
y = signal.lfilter(b, 1., x, axis=axis)
else:
(b, a) = signal.cheby1(n, 0.05, 0.8/q)
y = signal.lfilter(b, a, x, axis=axis)
return y.swapaxes(0,axis)[n/2::q].swapaxes(0,axis)
class UnavailableDecimation(Exception):
pass
class Glob:
decitab_nmax = 0
decitab = {}
def mk_decitab(nmax=100):
tab = Glob.decitab
for i in range(1,10):
for j in range(1,i+1):
for k in range(1,j+1):
for l in range(1,k+1):
for m in range(1,l+1):
p = i*j*k*l*m
if p > nmax: break
if p not in tab:
tab[p] = (i,j,k,l,m)
if i*j*k*l > nmax: break
if i*j*k > nmax: break
if i*j > nmax: break
if i > nmax: break
def decitab(n):
if n > Glob.decitab_nmax:
mk_decitab(n*2)
if n not in Glob.decitab: raise UnavailableDecimation('ratio = %g' % ratio)
return Glob.decitab[n]
def gmctime(t):
return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(t))
def gmctime_v(t):
return time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(t))
def gmctime_fn(t):
return time.strftime("%Y-%m-%d_%H-%M-%S", time.gmtime(t))
reuse_store = dict()
def reuse(x):
if not x in reuse_store:
reuse_store[x] = x
return reuse_store[x]

+ 15
- 0
setup.py View File

@ -0,0 +1,15 @@
import numpy
from distutils.core import setup, Extension
setup (name = 'pyrocko',
include_dirs = [ numpy.get_include() ],
version = '0.1',
description = 'Seismological Processing Unit',
packages = [ 'pyrocko' ],
ext_modules = [ Extension('pyrocko/mseed_ext',
include_dirs = ['./libmseed'],
library_dirs = ['./libmseed'],
libraries = ['mseed'],
sources = ['pyrocko/mseed_ext.c']) ])

Loading…
Cancel
Save