# Copyright (c) 2013, Lafe Conner and Kem Elbrader # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Lafe Conner nor Kem Elbrader nor the names of any other # copyright holders or contributors, if any, of this software may be used to # endorse or promote products derived from this software without specific prior # written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. import csv import time import sys import os from time import mktime from datetime import datetime def load_csv_reader(): input_file = raw_input('Please specify the file you would like to convert?\n') input = open(input_file, "rU") reader = csv.reader(input) return reader def read_data(reader): data = {} first_row = reader.next() second_row = reader.next() # not used third_row = reader.next() # not used collection_set_name = first_row[0] output_directory_name = create_output_dir(collection_set_name) dataset_names = first_row[1:] dataset_filenames = initialize_dataset_filenames(dataset_names, output_directory_name, collection_set_name) initialize_dataset(data, dataset_filenames) process_data(reader, dataset_filenames, data) return data def process_data(reader, dataset_filenames, data): for row in reader: process_row(row, dataset_filenames, data) def process_row(row, dataset_filenames, data): date = get_row_date(row) for filename, value in zip(dataset_filenames, row[1:]): value = get_normalized_value(value) data[filename].append((date, value)) def initialize_dataset(data, dataset_filenames): for name in dataset_filenames: data[name] = [("LocalDateTime", "DataValue")] def initialize_dataset_filenames(dataset_names, output_directory_name, collection_set_name): dataset_filenames = [] for dataset_name in dataset_names: output_file_path = "%s/%s-%s.csv" % (output_directory_name, collection_set_name, dataset_name) dataset_filenames.append(output_file_path) return dataset_filenames def get_normalized_value(value): if value == "***": value = "-999" # This replaces *** with -999 return value def create_output_dir(collection_set_name): output_directory_name = "%s-output" % collection_set_name if not os.path.exists(output_directory_name): os.mkdir(output_directory_name) return output_directory_name def get_row_date(row): value = row[0] struct = time.strptime(value, "%m/%d/%Y %I:%M %p") dt = datetime.fromtimestamp(mktime(struct)) date = dt.strftime("%Y-%m-%d %H:%M:%S") return date def write_data(data): for file_num, file_name in enumerate(data): file = open(file_name, "w") rows = data[file_name] writer = csv.writer(file) writer.writerows(rows) print "Successfully converted the file named %s." % file_name try: reader = load_csv_reader() data = read_data(reader) write_data(data) except csv.Error as e: sys.exit("file %s, line %d: %s" % (inputFile, reader.line_num, e))