you need to use ascii code too

2 views (last 30 days)
Antonio-Cristian
Antonio-Cristian on 8 Dec 2023
Edited: DGM on 8 Dec 2023
function [Program_Pr11] = Program_Pr11(Program_Pr11)
% 1 - Încarcă textul din fișierul RTF
fileID = fopen('text.rtf','r'); % Înlocuiește 'text.rtf' cu numele fișierului tău
text = fscanf(fileID,'%c');
fclose(fileID);
% 2 - Determină limba textului (acest exemplu folosește o librărie externă, apelând un serviciu online sau o bibliotecă MATLAB)
detected_language = detectLanguage(text);
% 3 - Numărul de cuvinte și apariția fiecărui cuvânt
words = split(text, ' ');
num_words = numel(words);
word_counts = countOccurrences(words);
% 4 - Numărul de litere și frecvența fiecărui caracter
letters = replace(text, ' ', ''); % Elimină spațiile
num_letters = numel(letters);
letter_counts = countOccurrences(letters);
unique_chars = unique(letters);
% 5 - Verifică și completează dicționarul cu cuvintele din text
dictionary = loadDictionary(); % Încarcă dicționarul existent
missing_words = findMissingWords(words, dictionary);
updated_dictionary = addToDictionary(missing_words, dictionary);
% 6 - Verifică prezența anumitor cuvinte indicate în text
specified_words = {'word1', 'word2', 'word3'}; % Cuvintele de verificat
word_presence = checkWordPresence(specified_words, words);
% 7 - Reprezintă grafic frecvențele de apariție ale caracterelor în text
figure;
bar(unique_chars, letter_counts);
xlabel('Caractere');
ylabel('Frecvență de apariție');
title('Frecvențele de apariție ale caracterelor în text');
% Funcțiile auxiliare
function detected_language = detectLanguage(text)
% Implementează funcția pentru a detecta limba textului
% Returnează limba detectată
% Poate fi necesară utilizarea unor servicii sau biblioteci specializate
% Exemplu: detected_language = MyLanguageDetectionFunction(text);
detected_language = 'Romanian'; % Exemplu: limba detectată este româna
end
function counts = countOccurrences(items)
unique_items = unique(items);
counts = zeros(size(unique_items));
for i = 1:numel(unique_items)
counts(i) = sum(ismember(items, unique_items(i)));
end
end
function dictionary = loadDictionary()
% Implementează funcția pentru a încărca dicționarul
% Returnează dicționarul
dictionary = {'word1', 'word2', 'word3'}; % Exemplu: dicționarul tău
end
function missing_words = findMissingWords(words, dictionary)
missing_words = setdiff(unique(words), dictionary);
end
function updated_dictionary = addToDictionary(new_words, dictionary)
updated_dictionary = [dictionary new_words];
end
function word_presence = checkWordPresence(specific_words, words)
word_presence = ismember(specific_words, words);
end
end
  2 Comments
Dyuman Joshi
Dyuman Joshi on 8 Dec 2023
What's your question?
DGM
DGM on 8 Dec 2023
Besides not actually saying what you want, you haven't included the working text.rtf. Are you aware that text.rtf is not a plain text file? I have to ask, because you're processing it like it's plain text when it's not. Disregarding array orientation errors, you're trying to fill your dictionary with a bunch of markup garbage that's not body text.

Sign in to comment.

Answers (1)

DGM
DGM on 8 Dec 2023
Edited: DGM on 8 Dec 2023
I fixed the improper concatenation and bad inputs to bar()
Program_Pr11('text.rtf.fakeextension.txt')
Of course, the bar labels are unreadable at this figure scale. I can't change that on the forum.
So what are those unique characters?
unique_chars =
'
$*-./0123456789;ABCDFHILNOQRSTWXYZ\_abcdefghijklmnopqrstuvwxy{}'
Let's look at the RTF file. Note that there aren't any uppercase characters or punctuation.
Let's see what the dictionary looks like:
words =
66×1 cell array
{'{\rtf1\ansi\deff3\adeflang1025↵{\fonttbl{\f0\froman\fprq2\fcharset0' }
{'Times' }
{'New' }
{'Roman;}{\f1\froman\fprq2\fcharset2' }
{'Symbol;}{\f2\fswiss\fprq2\fcharset0' }
{'Arial;}{\f3\froman\fprq2\fcharset0' }
{'Liberation' }
{'Serif{\*\falt' }
{'Times' }
{'New' }
{'Roman};}{\f4\fswiss\fprq2\fcharset0' }
{'Liberation' }
{'Sans{\*\falt' }
{'Arial};}{\f5\fnil\fprq2\fcharset0' }
{'WenQuanYi' }
{'Zen' }
{'Hei;}{\f6\fnil\fprq2\fcharset0' }
{'FreeSans;}{\f7\fswiss\fprq0\fcharset128' }
{'FreeSans;}}↵{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;\red255\green143\blue31;\red239\green0\blue0;}↵{\stylesheet{\s0\snext0\nowidctlpar\hyphpar0\cf0\kerning1\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\loch\f3\fs24\lang1033'}
{'Normal;}↵{\s15\sbasedon0\snext16\sb240\sa120\keepn\dbch\af5\dbch\af6\afs28\loch\f4\fs28' }
{'Heading;}↵{\s16\sbasedon0\snext16\sl288\slmult1\sb0\sa140' }
{'Text' }
{'Body;}↵{\s17\sbasedon16\snext17\sl288\slmult1\sb0\sa140\dbch\af7' }
{'List;}↵{\s18\sbasedon0\snext18\sb120\sa120\noline\i\dbch\af7\afs24\ai\fs24' }
{'Caption;}↵{\s19\sbasedon0\snext19\noline\dbch\af7' }
{'Index;}↵}{\*\generator' }
{'LibreOffice/5.0.3.2$Linux_X86_64' }
{'LibreOffice_project/00m0$Build-2}{\info{\author' }
{'fahrekin' }
{'}{\creatim\yr2023\mo12\dy8\hr6\min39}{\author' }
{'fahrekin' }
{'}{\revtim\yr2023\mo12\dy8\hr6\min42}{\printim\yr0\mo0\dy0\hr0\min0}}\deftab709↵\viewscale100↵{\*\pgdsctbl↵{\pgdsc0\pgdscuse451\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\pgdscnxt0' }
{'Default' }
{'Style;}}↵\formshade\paperh15840\paperw12240\margl1134\margr1134\margt1134\margb1134\sectd\sbknone\sectunlocked1\pgndec\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\ftnbj\ftnstart1\ftnrstcont\ftnnar\aenddoc\aftnrstcont\aftnstart1\aftnnrlc↵{\*\ftnsep\chftnsep}\pgndec\pard\plain' }
{'\s0\nowidctlpar\hyphpar0\cf0\kerning1\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\loch\f3\fs24\lang1033{\rtlch' }
{'\ltrch\loch↵blah' }
{'blah' }
{'blah' }
{'}↵\par' }
{'\pard\plain' }
{'\s0\nowidctlpar\hyphpar0\cf0\kerning1\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\loch\f3\fs24\lang1033{\cf18\rtlch' }
{'\ltrch\loch↵apple}{\rtlch' }
{'\ltrch\loch↵' }
{'}{\cf7\rtlch' }
{'\ltrch\loch↵banana}{\rtlch' }
{'\ltrch\loch↵' }
{'}{\cf17\rtlch' }
{'\ltrch\loch↵orange}↵\par' }
{'\pard\plain' }
{'\s0\nowidctlpar\hyphpar0\cf0\kerning1\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\loch\f3\fs24\lang1033{\b\ab\rtlch' }
{'\ltrch\loch↵cat' }
{'dog' }
{'hamster}↵\par' }
{'\pard\plain' }
{'\s0\nowidctlpar\hyphpar0\cf0\kerning1\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\loch\f3\fs24\lang1033{\rtlch' }
{'\ltrch\loch↵word1' }
{'}{\i\ai\rtlch' }
{'\ltrch\loch↵word1}{\rtlch' }
{'\ltrch\loch↵' }
{'word2' }
{'}{\ul\ulc0\rtlch' }
{'\ltrch\loch↵word3}{\rtlch' }
{'\ltrch\loch↵' }
{'word3' }
{'word3}↵\par' }
{'}'

Categories

Find more on Characters and Strings in Help Center and File Exchange

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!