Second to last row?

Question

0 votes

Obs_Node.txt.zip

I'm created a function to read output files of varying length. However I want the function to read the data until the second to last line as the output file (see attached). The output file has END written at the bottom of the text which I want to ignore. How would I change the script so that it essentially reads Inf-1 (see line 19 of code)?

function [time,h,th,etaFlux,Conc] = importfile(filename, startRow, endRow)
%IMPORTFILE Import numeric data from a text file as column vectors.
%   [TIME,H,TH,ETAFLUX,CONC] = IMPORTFILE(FILENAME)
%   Reads data from text file FILENAME for the default selection.
%
%   [TIME,H,TH,ETAFLUX,CONC] = IMPORTFILE(FILENAME, STARTROW, ENDROW)
%   Reads data from rows STARTROW through ENDROW of text file FILENAME.
%
% Example:
%   [time,h,th,etaFlux,Conc] = importfile('Obs_Node.txt',8, 117660);
%
%    See also TEXTSCAN.
% Auto-generated by MATLAB on 2022/10/26 09:45:27
%% Initialize variables.
if nargin<=2
    startRow = 8;
    endRow = Inf;  %THIS IS WERE MAYBE?? THE TEXT SHOULD CHANGE?  
%SO THAT THE DATA IS READ FROM LINE 8 TO THE SECOND TO LAST LINE?  BUT INF-1 DOESN'T WORK...  
end
%% Read columns of data as text:
% For more information, see the TEXTSCAN documentation.
formatSpec = '%16s%12s%8s%12s%s%[^\n\r]';
%% Open the text file.
fileID = fopen(filename,'r');
%% Read columns of data according to the format.
% This call is based on the structure of the file used to generate this code. If an error occurs for a different file, try regenerating the code from the Import Tool.
textscan(fileID, '%[^\n\r]', startRow(1)-1, 'WhiteSpace', '', 'ReturnOnError', false);
dataArray = textscan(fileID, formatSpec, endRow(1)-startRow(1)+1, 'Delimiter', '', 'WhiteSpace', '', 'TextType', 'string', 'ReturnOnError', false, 'EndOfLine', '\r\n');
for block=2:length(startRow)
    frewind(fileID);
    textscan(fileID, '%[^\n\r]', startRow(block)-1, 'WhiteSpace', '', 'ReturnOnError', false);
    dataArrayBlock = textscan(fileID, formatSpec, endRow(block)-startRow(block)+1, 'Delimiter', '', 'WhiteSpace', '', 'TextType', 'string', 'ReturnOnError', false, 'EndOfLine', '\r\n');
    for col=1:length(dataArray)
        dataArray{col} = [dataArray{col};dataArrayBlock{col}];
    end
end
%% Close the text file.
fclose(fileID);
%% Convert the contents of columns containing numeric text to numbers.
% Replace non-numeric text with NaN.
raw = repmat({''},length(dataArray{1}),length(dataArray)-1);
for col=1:length(dataArray)-1
    raw(1:length(dataArray{col}),col) = mat2cell(dataArray{col}, ones(length(dataArray{col}), 1));
end
numericData = NaN(size(dataArray{1},1),size(dataArray,2));
for col=[1,2,3,4,5]
    % Converts text in the input cell array to numbers. Replaced non-numeric text with NaN.
    rawData = dataArray{col};
    for row=1:size(rawData, 1)
        % Create a regular expression to detect and remove non-numeric prefixes and suffixes.
        regexstr = '(?<prefix>.*?)(?<numbers>([-]*(\d+[\,]*)+[\.]{0,1}\d*[eEdD]{0,1}[-+]*\d*[i]{0,1})|([-]*(\d+[\,]*)*[\.]{1,1}\d+[eEdD]{0,1}[-+]*\d*[i]{0,1}))(?<suffix>.*)';
        try
            result = regexp(rawData(row), regexstr, 'names');
            numbers = result.numbers;
            
            % Detected commas in non-thousand locations.
            invalidThousandsSeparator = false;
            if numbers.contains(',')
                thousandsRegExp = '^[-/+]*\d+?(\,\d{3})*\.{0,1}\d*$';
                if isempty(regexp(numbers, thousandsRegExp, 'once'))
                    numbers = NaN;
                    invalidThousandsSeparator = true;
                end
            end
            % Convert numeric text to numbers.
            if ~invalidThousandsSeparator
                numbers = textscan(char(strrep(numbers, ',', '')), '%f');
                numericData(row, col) = numbers{1};
                raw{row, col} = numbers{1};
            end
        catch
            raw{row, col} = rawData{row};
        end
    end
end
%% Replace non-numeric cells with NaN
R = cellfun(@(x) ~isnumeric(x) && ~islogical(x),raw); % Find non-numeric cells
raw(R) = {NaN}; % Replace non-numeric cells
%% Allocate imported array to column variable names
time = cell2mat(raw(:, 1));
h = cell2mat(raw(:, 2));
th = cell2mat(raw(:, 3));
etaFlux = cell2mat(raw(:, 4));
Conc = cell2mat(raw(:, 5));

0 Comments
Show -2 older comments Hide -2 older comments

Sign in to comment.

Sign in to answer this question.

Follow Question

Answer 1

Voss on 26 Oct 2022

Open in MATLAB Online

0 votes

Obs_Node.txt.zip

% unzip the zip file to get the txt file
% (you won't have to do this since you have the txt file already):
unzip('Obs_Node.txt.zip')
% read the txt file into a table:
t = readtable('Obs_Node.txt','NumHeaderLines',10);
% remove the last row (all NaNs):
t(end,:) = [];
% check the first and last few rows of the table to verify it has everything (it does):
[head(t); tail(t)]
ans = 16×5 table
     time      h     theta       Flux      Conc
    ______    ___    ______    ________    ____

     0.001    -25    0.2827      -779.2     0  
     0.002    -25    0.2827      -321.5     0  
     0.003    -25    0.2827      -159.3     0  
     0.004    -25    0.2827      -91.51     0  
    0.0053    -25    0.2827      -52.03     0  
     0.007    -25    0.2827      -31.68     0  
    0.0092    -25    0.2827       -21.3     0  
     0.012    -25    0.2827      -14.35     0  
     18258    -25    0.2827    -0.04248     0  
     18258    -25    0.2827      -0.042     0  
     18258    -25    0.2827      -0.042     0  
     18259    -25    0.2827    -0.04153     0  
     18260    -25    0.2827    -0.04115     0  
     18261    -25    0.2827    -0.04067     0  
     18262    -25    0.2827    -0.04067     0  
     18262    -25    0.2827    -0.04019     0  

4 Comments
Show 2 older comments Hide 2 older comments

Voss on 27 Oct 2022

Open in MATLAB Online

Obs_Node.out.zip

In that case, specify 'FileType','text' in the call to readtable:

% unzip the zip file to get the out file
% (you won't have to do this since you have the out file already):
unzip('Obs_Node.out.zip')
% read the out file into a table:
t = readtable('Obs_Node.out','FileType','text','NumHeaderLines',10);
% remove the last row (all NaNs):
t(end,:) = [];
% check the first and last few rows of the table to verify it has everything (it does):
[head(t); tail(t)]
ans = 16×5 table
     time      h     theta       Flux      Conc
    ______    ___    ______    ________    ____

     0.001    -25    0.2827      -779.2     0  
     0.002    -25    0.2827      -321.5     0  
     0.003    -25    0.2827      -159.3     0  
     0.004    -25    0.2827      -91.51     0  
    0.0053    -25    0.2827      -52.03     0  
     0.007    -25    0.2827      -31.68     0  
    0.0092    -25    0.2827       -21.3     0  
     0.012    -25    0.2827      -14.35     0  
     18258    -25    0.2827    -0.04248     0  
     18258    -25    0.2827      -0.042     0  
     18258    -25    0.2827      -0.042     0  
     18259    -25    0.2827    -0.04153     0  
     18260    -25    0.2827    -0.04115     0  
     18261    -25    0.2827    -0.04067     0  
     18262    -25    0.2827    -0.04067     0  
     18262    -25    0.2827    -0.04019     0  

Wesser on 27 Oct 2022

Perfect. Thank you!

Voss on 27 Oct 2022

You're welcome! Any questions, let me know. Otherwise, please "Accept This Answer". Thanks!

Sign in to comment.

Answer 2

Mathieu NOE on 26 Oct 2022

Open in MATLAB Online

0 votes

hello

I have nothing personnally against your code , but it's slow and impractical as you have this issue with the "end" at the end of your input file plus the user must enter the startRow value and now you want to remove the last line of your data

try instead this code , much faster and not the limitations of the posted code - so simple !

% your code : Elapsed time is 100.540276 seconds.
% my code (below) : Elapsed time is 2.132095 seconds.
tic
[outdata,head] = readclm('Obs_Node.txt');
variables_names = split(strtrim(head(end,:))); % for fun or future use
%% Allocate imported array to column variable names
time = (outdata(:, 1));
h = (outdata(:, 2));
th = (outdata(:, 3));
etaFlux = (outdata(:, 4));
Conc = (outdata(:, 5));
toc
function  [outdata,head] = readclm(filename,nclm,skip,formt)
% READCLM Reads numerical data from a text file into a matrix.
%	Text file can begin with a header or comment block.
%	[DATA,HEAD] = READCLM(FILENAME,NCLM,SKIP,FORMAT)
%	Opens file FILENAME, skips first several lines specified
%	by SKIP number or beginning with comment '%'.
%	Then reads next several lines into a string matrix HEAD
%	until the first line with numerical data is encountered
%	(that is until first non-empty output of SSCANF).
%	Then reads the rest of the file into a numerical matrix
%	DATA in a format FORMAT with number of columns equal
%	to number of columns of the text file or specified by
%	number NCLM. If data does not match the size of the
%	matrix DATA, it is padded with NaN at the end.
%
%	READCLM(FILENAME) reads data from a text file FILENAME,
%	skipping only commented lines. It determines number of
%	columns by the length of the first data line and uses
%	the floating point format '%g';
%
%	READCLM uses FGETS to read the first lines and 	FSCANF
%	for reading data.
 % Defaults and  parameters ..............................
formt_dflt = '%g';  % Default format for fscanf
addn = nan;         % Number to fill the end if necessary
 % Handle input ..........................................
if nargin<1, error('  File name is undefined'); end
if nargin<4, formt = formt_dflt; end
if nargin<3, skip = 0; end
if nargin<2, nclm = 0; end
if isempty(nclm), nclm = 0; end
if isempty(skip), skip = 0; end
 % Open file ............................
[fid,msg] = fopen(filename);
if fid<0, disp(msg), return, end
 % Find header and first  data line ......................
is_head = 1;
jl = 0;
head = ' ';
while is_head  % Add lines to header.....
  s = fgets(fid);           % Get next line
  jl = jl+1;
  is_skip = jl<=skip;
  is_skip = jl<=skip | s(1)=='%';
  out1 = sscanf(s,formt);   % Try to read this line
   % If unreadable by SSCANF or skip, add to header
  is_head = isempty(out1) | is_skip;
  if is_head & ~is_skip
    head = str2mat(head,s(1:length(s)-1)); end
end
head = head(2:size(head,1),:);
 % Determine number of columns if not specified
out1 = out1(:)';
l1 = length(out1);
if ~nclm, nclm = l1; end
 % Read the rest of the file ..............................
if l1~=nclm  % First line format is different from ncolumns
  outdata = fscanf(fid,formt);
  lout = length(outdata)+l1;
  ncu = ceil(lout/nclm);
  lz = nclm*ncu-lout;
  outdata = [out1'; outdata(:); ones(lz,1)*addn];
  outdata = reshape(outdata,nclm,ncu)';
else              % Regular case
  outdata = fscanf(fid,formt,[nclm inf]);
  outdata = [out1; outdata'];  % Add the first line
end
fclose (fid);     % Close file ..........
end

1 Comment
Show -1 older comments Hide -1 older comments

Wesser on 27 Oct 2022

I found this function on how to concatenate vectors with different lengths by padding with NaN, but I can't figure out how to apply this to my forloop.

Sign in to comment.

Second to last row?

0 Comments
Show -2 older comments Hide -2 older comments

Answers (2)

4 Comments
Show 2 older comments Hide 2 older comments

1 Comment
Show -1 older comments Hide -1 older comments

Categories

Products

Release

Tags

Community Treasure Hunt

Second to last row?

0 Comments Show -2 older comments Hide -2 older comments

Answers (2)

4 Comments Show 2 older comments Hide 2 older comments

1 Comment Show -1 older comments Hide -1 older comments

Categories

Products

Release

Tags

See Also

Community Treasure Hunt

0 Comments
Show -2 older comments Hide -2 older comments

4 Comments
Show 2 older comments Hide 2 older comments

1 Comment
Show -1 older comments Hide -1 older comments