Skip to content

multi language open tasks #2315

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ All supported languages and their supported versions are listed below.
| [EMF Model](https://www.eclipse.org/modeling/emf/) | 2.25.0 | emf-model | alpha | EMF |
| [SCXML](https://www.w3.org/TR/scxml/) | 1.0 | scxml | alpha | XML |
| Text (naive, use with caution) | - | text | legacy | CoreNLP |
| Multi-Language | - | multi | alpha | - |

## Download and Installation
You need Java SE 21 to run or build JPlag.
Expand Down
14 changes: 14 additions & 0 deletions language-api/src/main/java/de/jplag/Language.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,18 @@ default boolean supportsNormalization() {
default boolean requiresCoreNormalization() {
return true;
}

/**
* @return True, if the language module can be used by the multi-language module
*/
default boolean supportsMultilanguage() {
return true;
}

/**
* @return True, if the language module should be used in case of ambiguities
*/
default boolean hasPriority() {
return false;
}
}
5 changes: 5 additions & 0 deletions languages/cpp/src/main/java/de/jplag/cpp/CPPLanguage.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,9 @@ public boolean supportsNormalization() {
public List<Token> parse(Set<File> files, boolean normalize) throws ParsingException {
return new CPPParserAdapter().parse(files);
}

@Override
public boolean hasPriority() {
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,9 @@ public List<File> customizeSubmissionOrder(List<File> sub) {
public List<Token> parse(Set<File> files, boolean normalize) throws ParsingException {
return new DynamicModelParser().parse(files, normalize);
}

@Override
public boolean supportsMultilanguage() {
return false;
}
}
7 changes: 7 additions & 0 deletions languages/multi-language/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Multi-Language module

This module allows parsing multiple languages in one run of JPlag. The parsing will be delegated to a different language-module per file.

This does not entail comparing implementations of the same functionality in different languages to each other, but allows comparing submissions that use more than one language each.

By default, all supported languages (except text) are parsed. This can be changed by specifying the language modules to use with the --languages parameter: `java -jar jplag.jar <root folder> multi --languages <firstLanguage>,<secondLanguage>,...`
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ public class MultiLanguageOptions extends LanguageOptions {
public List<Language> getLanguages() {
if (this.languages == null) {
if (languageNames.getValue() == null) {
throw new IllegalArgumentException(ERROR_NOT_ENOUGH_LANGUAGES);
this.languages = LanguageLoader.getAllAvailableLanguages().values().stream().filter(Language::supportsMultilanguage).toList();
} else {
this.languages = Arrays.stream(languageNames.getValue().split(","))
.map(name -> LanguageLoader.getLanguage(name)
.orElseThrow(() -> new IllegalArgumentException(String.format(ERROR_LANGUAGE_NOT_FOUND, name))))
.filter(language -> !language.getClass().equals(MultiLanguage.class)).toList();
}

this.languages = Arrays.stream(languageNames.getValue().split(","))
.map(name -> LanguageLoader.getLanguage(name)
.orElseThrow(() -> new IllegalArgumentException(String.format(ERROR_LANGUAGE_NOT_FOUND, name))))
.filter(language -> !language.getClass().equals(MultiLanguage.class)).toList();

if (this.languages.isEmpty()) {
throw new IllegalArgumentException(ERROR_NOT_ENOUGH_LANGUAGES);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,28 @@
import java.util.Optional;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.jplag.Language;
import de.jplag.ParsingException;
import de.jplag.Token;

public class MultiLanguageParser {
private static final Logger LOG = LoggerFactory.getLogger(MultiLanguageParser.class);
private static final String WARNING = "This module only allows parsing of multiple languages. No comparisons will be made between languages";
private static final String ERROR_MULTIPLE_PRIORITY_LANGUAGES = "Multiple language modules with priority (%s) have been found for file: %s";
private static final String ERROR_MULTIPLE_LANGUAGES = "Multiple language modules (%s) have been found for file: %s";
private final List<Language> languages;
private static boolean hasPrintedWarning;

public MultiLanguageParser(MultiLanguageOptions options) {
this.languages = options.getLanguages();
hasPrintedWarning = false;
}

public List<Token> parseFiles(Set<File> files, boolean normalize) throws ParsingException {
this.printWarning();
List<Token> results = new ArrayList<>();
for (File file : files) {
Optional<Language> language = findLanguageForFile(file);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We discussed that we can find out at compile time whether the mapping of file extensions to language modules is well-defined. I would argue then that we should set up an actual look up table instead of repeating the same steps for each individual file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a specific warning, but a general one that is printed the first time the language module is used. See #2304

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if someone wants to use the content and not only the file extensions: https://github.com/ArDoCo/magika we ported magika to java :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could do that, but I'd recommend doing it in a separate PR and everywhere in JPlag

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I meant it as general information . I would not implement it in this PR :)

Expand All @@ -29,8 +39,45 @@ public List<Token> parseFiles(Set<File> files, boolean normalize) throws Parsing
return results;
}

private Optional<Language> findLanguageForFile(File file) {
return this.languages.stream().filter(language -> Arrays.stream(language.suffixes()).anyMatch(suffix -> file.getName().endsWith(suffix)))
.findFirst();
private Optional<Language> findLanguageForFile(File file) throws ParsingException {
List<Language> normalLanguages = new ArrayList<>();
List<Language> priorityLanguages = new ArrayList<>();

for (Language language : this.languages) {
if (Arrays.stream(language.suffixes()).anyMatch(it -> file.getName().toLowerCase().endsWith(it.toLowerCase()))) {
if (language.hasPriority()) {
priorityLanguages.add(language);
} else {
normalLanguages.add(language);
}
}
}

if (!priorityLanguages.isEmpty()) {
if (priorityLanguages.size() > 1) {
throw new ParsingException(file, String.format(ERROR_MULTIPLE_PRIORITY_LANGUAGES,
String.join(", ", priorityLanguages.stream().map(Language::getName).toList()), file.getPath()));
}

return Optional.of(priorityLanguages.getFirst());
} else {
if (normalLanguages.isEmpty()) {
return Optional.empty();
}

if (normalLanguages.size() > 1) {
throw new ParsingException(file, String.format(ERROR_MULTIPLE_LANGUAGES,
String.join(", ", normalLanguages.stream().map(Language::getName).toList()), file.getPath()));
}

return Optional.of(normalLanguages.getFirst());
}
}

private void printWarning() {
if (!hasPrintedWarning) {
hasPrintedWarning = true;
LOG.warn(WARNING);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,6 @@ void testMultiLanguageParsing() throws ParsingException {
Assertions.assertEquals(expectedTokens, tokens.stream().map(Token::getType).toList());
}

@Test
void testNoLanguagesConfigured() {
MultiLanguage languageModule = new MultiLanguage();
Assertions.assertThrowsExactly(IllegalArgumentException.class, () -> {
languageModule.parse(Set.of(javaCode, cppCode), false);
});
}

@Test
void testInvalidLanguage() {
MultiLanguage languageModule = new MultiLanguage();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,9 @@ public int minimumTokenMatch() {
public List<Token> parse(Set<File> files, boolean normalize) throws ParsingException {
return new ParserAdapter().parse(files);
}

@Override
public boolean supportsMultilanguage() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,9 @@ public TypeScriptLanguageOptions getOptions() {
public List<Token> parse(Set<File> files, boolean normalize) throws ParsingException {
return new TypeScriptParserAdapter(options.useStrictDefault()).parse(files);
}

@Override
public boolean hasPriority() {
return true;
}
}
Loading